Merge branch 'master' into win-pthreads

This commit is contained in:
Devin Matthews
2018-10-16 10:12:25 -05:00
committed by GitHub
97 changed files with 16468 additions and 1493 deletions

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -55,11 +56,9 @@ double bli_clock_min_diff( double time_min, double time_start )
// Assume that anything:
// - under or equal to zero,
// - over an hour, or
// - under a nanosecond
// is actually garbled due to the clocks being taken too closely together.
if ( time_min <= 0.0 ) time_min = time_min_prev;
else if ( time_min > 3600.0 ) time_min = time_min_prev;
else if ( time_min < 1.0e-9 ) time_min = time_min_prev;
return time_min;

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -94,6 +95,60 @@ gint_t bli_info_get_enable_packbuf_pools( void )
return 0;
#endif
}
gint_t bli_info_get_enable_threading( void )
{
if ( bli_info_get_enable_openmp() ||
bli_info_get_enable_pthreads() ) return 1;
else return 0;
}
gint_t bli_info_get_enable_openmp( void )
{
#ifdef BLIS_ENABLE_OPENMP
return 1;
#else
return 0;
#endif
}
gint_t bli_info_get_enable_pthreads( void )
{
#ifdef BLIS_ENABLE_PTHREADS
return 1;
#else
return 0;
#endif
}
gint_t bli_info_get_thread_part_jrir_slab( void )
{
#ifdef BLIS_ENABLE_JRIR_SLAB
return 1;
#else
return 0;
#endif
}
gint_t bli_info_get_thread_part_jrir_rr( void )
{
#ifdef BLIS_ENABLE_JRIR_RR
return 1;
#else
return 0;
#endif
}
gint_t bli_info_get_enable_memkind( void )
{
#ifdef BLIS_ENABLE_MEMKIND
return 1;
#else
return 0;
#endif
}
gint_t bli_info_get_enable_sandbox( void )
{
#ifdef BLIS_ENABLE_SANDBOX
return 1;
#else
return 0;
#endif
}

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -58,6 +59,13 @@ gint_t bli_info_get_enable_blas( void );
gint_t bli_info_get_enable_cblas( void );
gint_t bli_info_get_blas_int_type_size( void );
gint_t bli_info_get_enable_packbuf_pools( void );
gint_t bli_info_get_enable_threading( void );
gint_t bli_info_get_enable_openmp( void );
gint_t bli_info_get_enable_pthreads( void );
gint_t bli_info_get_thread_part_jrir_slab( void );
gint_t bli_info_get_thread_part_jrir_rr( void );
gint_t bli_info_get_enable_memkind( void );
gint_t bli_info_get_enable_sandbox( void );
// -- Kernel implementation-related --------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -45,7 +46,7 @@ void bli_prune_unref_mparts( obj_t* p, mdim_t mdim_p,
// matrix is empty. This is not strictly needed but rather a minor
// optimization, as it would prevent threads that would otherwise get
// subproblems on BLIS_ZEROS operands from calling the macro-kernel,
// because bli_thread_get_range*() would return empty ranges, which would
// because bli_thread_range*() would return empty ranges, which would
// cause the variant's for loop from executing any iterations.
// NOTE: this should only ever execute if the primary object is
// triangular because that is the only structure type with subpartitions

View File

@@ -101,16 +101,16 @@ bli_rntm_print( rntm );
}
else if ( l3_op == BLIS_TRSM )
{
// For trsm_l, we extract all parallelism from the jr loop, and
// for trsm_r, we extract all parallelism from the ic loop.
// For trsm_l, we extract all parallelism from the jc and jr loops.
// For trsm_r, we extract all parallelism from the ic loop.
if ( bli_is_left( side ) )
{
bli_rntm_set_ways_only
(
jc,
1,
1,
1,
ic * pc * jc * jr * ir,
ic * pc * jr * ir,
1,
rntm
);
@@ -198,15 +198,15 @@ void bli_rntm_set_ways_from_rntm
pc = 1;
bli_partition_2x2( nt, m*BLIS_DEFAULT_M_THREAD_RATIO,
n*BLIS_DEFAULT_N_THREAD_RATIO, &ic, &jc );
bli_partition_2x2( nt, m*BLIS_THREAD_RATIO_M,
n*BLIS_THREAD_RATIO_N, &ic, &jc );
for ( ir = BLIS_DEFAULT_MR_THREAD_MAX ; ir > 1 ; ir-- )
for ( ir = BLIS_THREAD_MAX_IR ; ir > 1 ; ir-- )
{
if ( ic % ir == 0 ) { ic /= ir; break; }
}
for ( jr = BLIS_DEFAULT_NR_THREAD_MAX ; jr > 1 ; jr-- )
for ( jr = BLIS_THREAD_MAX_JR ; jr > 1 ; jr-- )
{
if ( jc % jr == 0 ) { jc /= jr; break; }
}