mirror of
https://github.com/amd/blis.git
synced 2026-05-05 06:51:11 +00:00
Merge branch 'master' into win-pthreads
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -55,11 +56,9 @@ double bli_clock_min_diff( double time_min, double time_start )
|
||||
|
||||
// Assume that anything:
|
||||
// - under or equal to zero,
|
||||
// - over an hour, or
|
||||
// - under a nanosecond
|
||||
// is actually garbled due to the clocks being taken too closely together.
|
||||
if ( time_min <= 0.0 ) time_min = time_min_prev;
|
||||
else if ( time_min > 3600.0 ) time_min = time_min_prev;
|
||||
else if ( time_min < 1.0e-9 ) time_min = time_min_prev;
|
||||
|
||||
return time_min;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -94,6 +95,60 @@ gint_t bli_info_get_enable_packbuf_pools( void )
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_enable_threading( void )
|
||||
{
|
||||
if ( bli_info_get_enable_openmp() ||
|
||||
bli_info_get_enable_pthreads() ) return 1;
|
||||
else return 0;
|
||||
}
|
||||
gint_t bli_info_get_enable_openmp( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_enable_pthreads( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_PTHREADS
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_thread_part_jrir_slab( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_JRIR_SLAB
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_thread_part_jrir_rr( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_JRIR_RR
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_enable_memkind( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEMKIND
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_enable_sandbox( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_SANDBOX
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -58,6 +59,13 @@ gint_t bli_info_get_enable_blas( void );
|
||||
gint_t bli_info_get_enable_cblas( void );
|
||||
gint_t bli_info_get_blas_int_type_size( void );
|
||||
gint_t bli_info_get_enable_packbuf_pools( void );
|
||||
gint_t bli_info_get_enable_threading( void );
|
||||
gint_t bli_info_get_enable_openmp( void );
|
||||
gint_t bli_info_get_enable_pthreads( void );
|
||||
gint_t bli_info_get_thread_part_jrir_slab( void );
|
||||
gint_t bli_info_get_thread_part_jrir_rr( void );
|
||||
gint_t bli_info_get_enable_memkind( void );
|
||||
gint_t bli_info_get_enable_sandbox( void );
|
||||
|
||||
|
||||
// -- Kernel implementation-related --------------------------------------------
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -45,7 +46,7 @@ void bli_prune_unref_mparts( obj_t* p, mdim_t mdim_p,
|
||||
// matrix is empty. This is not strictly needed but rather a minor
|
||||
// optimization, as it would prevent threads that would otherwise get
|
||||
// subproblems on BLIS_ZEROS operands from calling the macro-kernel,
|
||||
// because bli_thread_get_range*() would return empty ranges, which would
|
||||
// because bli_thread_range*() would return empty ranges, which would
|
||||
// cause the variant's for loop from executing any iterations.
|
||||
// NOTE: this should only ever execute if the primary object is
|
||||
// triangular because that is the only structure type with subpartitions
|
||||
|
||||
@@ -101,16 +101,16 @@ bli_rntm_print( rntm );
|
||||
}
|
||||
else if ( l3_op == BLIS_TRSM )
|
||||
{
|
||||
// For trsm_l, we extract all parallelism from the jr loop, and
|
||||
// for trsm_r, we extract all parallelism from the ic loop.
|
||||
// For trsm_l, we extract all parallelism from the jc and jr loops.
|
||||
// For trsm_r, we extract all parallelism from the ic loop.
|
||||
if ( bli_is_left( side ) )
|
||||
{
|
||||
bli_rntm_set_ways_only
|
||||
(
|
||||
jc,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
ic * pc * jc * jr * ir,
|
||||
ic * pc * jr * ir,
|
||||
1,
|
||||
rntm
|
||||
);
|
||||
@@ -198,15 +198,15 @@ void bli_rntm_set_ways_from_rntm
|
||||
|
||||
pc = 1;
|
||||
|
||||
bli_partition_2x2( nt, m*BLIS_DEFAULT_M_THREAD_RATIO,
|
||||
n*BLIS_DEFAULT_N_THREAD_RATIO, &ic, &jc );
|
||||
bli_partition_2x2( nt, m*BLIS_THREAD_RATIO_M,
|
||||
n*BLIS_THREAD_RATIO_N, &ic, &jc );
|
||||
|
||||
for ( ir = BLIS_DEFAULT_MR_THREAD_MAX ; ir > 1 ; ir-- )
|
||||
for ( ir = BLIS_THREAD_MAX_IR ; ir > 1 ; ir-- )
|
||||
{
|
||||
if ( ic % ir == 0 ) { ic /= ir; break; }
|
||||
}
|
||||
|
||||
for ( jr = BLIS_DEFAULT_NR_THREAD_MAX ; jr > 1 ; jr-- )
|
||||
for ( jr = BLIS_THREAD_MAX_JR ; jr > 1 ; jr-- )
|
||||
{
|
||||
if ( jc % jr == 0 ) { jc /= jr; break; }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user