mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
BLIS-Nov2022: HPL memory issues with GCC.
HPL script was using BLIS manual way to set threading, i.e. setting BLIS_IC_NT explicitly. This causes bli_rntm_num_threads() to return -1, which wasn't trapped in parallelised BLAS1 and BLAS2 routines. Fix: if this occurs, set local number of threads based on product of BLIS_JC_NT * BLIS_PC_NT * BLIS_IC_NT * BLIS_JR_NT * BLIS_IR_NT values. Note: BLIS_PC_NT should always be 1, but this environment variable is currently being read (contrary to documentation), so include it for now. Other changes: * implement _Pragma convention in all code used on AMD * frame/2/gemv/bli_gemv_unf_var1_amd.c: Remove is_omp_mt_enabled flag AMD-Internal: [CPUPL-2803] Change-Id: I37e8b038e5640d6693a87be0609888186322b465
This commit is contained in:
@@ -501,13 +501,6 @@ void bli_sgemv_unf_var1
|
||||
return;
|
||||
}
|
||||
|
||||
// If both multithreading and OpenMP are enabled, GEMV will multithread
|
||||
#if defined(BLIS_ENABLE_MULTITHREADING) && defined(BLIS_ENABLE_OPENMP)
|
||||
bool is_omp_mt_enabled = TRUE;
|
||||
#else
|
||||
bool is_omp_mt_enabled = FALSE;
|
||||
#endif
|
||||
|
||||
dim_t nt_max;
|
||||
|
||||
rntm_t rnmt_obj;
|
||||
@@ -517,9 +510,23 @@ void bli_sgemv_unf_var1
|
||||
// Query the total number of threads from the rntm_t object.
|
||||
nt_max = bli_rntm_num_threads( &rnmt_obj );
|
||||
|
||||
if ( ( nt_max > 1 ) & ( is_omp_mt_enabled == TRUE ) )
|
||||
if (nt_max<=0)
|
||||
{
|
||||
// nt is less than one if BLIS manual setting of parallelism
|
||||
// has been used. Parallelism here will be product of values.
|
||||
dim_t jc, pc, ic, jr, ir;
|
||||
jc = bli_rntm_jc_ways( &rnmt_obj );
|
||||
pc = bli_rntm_pc_ways( &rnmt_obj );
|
||||
ic = bli_rntm_ic_ways( &rnmt_obj );
|
||||
jr = bli_rntm_jr_ways( &rnmt_obj );
|
||||
ir = bli_rntm_ir_ways( &rnmt_obj );
|
||||
nt_max = jc*pc*ic*jr*ir;
|
||||
}
|
||||
|
||||
// If OpenMP is enabled, GEMV will multithread
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
if ( nt_max > 1 )
|
||||
{
|
||||
b_fuse = 4;
|
||||
|
||||
//Setting the thread count to the maximum number of threads provided
|
||||
@@ -545,10 +552,10 @@ void bli_sgemv_unf_var1
|
||||
cntx,
|
||||
nt
|
||||
);
|
||||
#endif// BLIS_ENABLE_OPENMP
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif// BLIS_ENABLE_OPENMP
|
||||
b_fuse = 8;
|
||||
|
||||
for ( i = 0; i < n_iter; i += f )
|
||||
@@ -575,7 +582,9 @@ void bli_sgemv_unf_var1
|
||||
cntx
|
||||
);
|
||||
}
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
}
|
||||
#endif// BLIS_ENABLE_OPENMP
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0_CZ( gemv_unf_var1 )
|
||||
|
||||
@@ -307,6 +307,20 @@ double ddot_blis_impl
|
||||
|
||||
// Query the total number of threads from the rntm_t object.
|
||||
nt = bli_rntm_num_threads(&rntm);
|
||||
|
||||
if (nt<=0)
|
||||
{
|
||||
// nt is less than one if BLIS manual setting of parallelism
|
||||
// has been used. Parallelism here will be product of values.
|
||||
dim_t jc, pc, ic, jr, ir;
|
||||
jc = bli_rntm_jc_ways( &rntm );
|
||||
pc = bli_rntm_pc_ways( &rntm );
|
||||
ic = bli_rntm_ic_ways( &rntm );
|
||||
jr = bli_rntm_jr_ways( &rntm );
|
||||
ir = bli_rntm_ir_ways( &rntm );
|
||||
nt = jc*pc*ic*jr*ir;
|
||||
}
|
||||
|
||||
mem_t local_mem_buf = { 0 };
|
||||
|
||||
bli_membrk_rntm_set_membrk(&rntm);
|
||||
@@ -348,7 +362,7 @@ double ddot_blis_impl
|
||||
n0_per_thread = n0 / nt;
|
||||
n0_rem = n0 % nt;
|
||||
|
||||
#pragma omp parallel num_threads(nt)
|
||||
_Pragma( "omp parallel num_threads(nt)" )
|
||||
{
|
||||
// Getting the actual number of threads that are spawned.
|
||||
dim_t nt_real = omp_get_num_threads();
|
||||
|
||||
@@ -268,6 +268,19 @@ void dscal_blis_impl
|
||||
bli_rntm_init_from_global( &rntm_local );
|
||||
dim_t nt = bli_rntm_num_threads( &rntm_local );
|
||||
|
||||
if (nt<=0)
|
||||
{
|
||||
// nt is less than one if BLIS manual setting of parallelism
|
||||
// has been used. Parallelism here will be product of values.
|
||||
dim_t jc, pc, ic, jr, ir;
|
||||
jc = bli_rntm_jc_ways( &rntm_local );
|
||||
pc = bli_rntm_pc_ways( &rntm_local );
|
||||
ic = bli_rntm_ic_ways( &rntm_local );
|
||||
jr = bli_rntm_jr_ways( &rntm_local );
|
||||
ir = bli_rntm_ir_ways( &rntm_local );
|
||||
nt = jc*pc*ic*jr*ir;
|
||||
}
|
||||
|
||||
#ifdef AOCL_DYNAMIC
|
||||
dim_t nt_ideal;
|
||||
|
||||
@@ -281,7 +294,7 @@ void dscal_blis_impl
|
||||
dim_t n_elem_per_thrd = n0 / nt;
|
||||
dim_t n_elem_rem = n0 % nt;
|
||||
|
||||
#pragma omp parallel num_threads( nt )
|
||||
_Pragma( "omp parallel num_threads(nt)" )
|
||||
{
|
||||
// Getting the actual number of threads that are spawned.
|
||||
dim_t nt_real = omp_get_num_threads();
|
||||
@@ -457,6 +470,19 @@ void zdscal_blis_impl
|
||||
bli_rntm_init_from_global( &rntm_local );
|
||||
dim_t nt = bli_rntm_num_threads( &rntm_local );
|
||||
|
||||
if (nt<=0)
|
||||
{
|
||||
// nt is less than one if BLIS manual setting of parallelism
|
||||
// has been used. Parallelism here will be product of values.
|
||||
dim_t jc, pc, ic, jr, ir;
|
||||
jc = bli_rntm_jc_ways( &rntm_local );
|
||||
pc = bli_rntm_pc_ways( &rntm_local );
|
||||
ic = bli_rntm_ic_ways( &rntm_local );
|
||||
jr = bli_rntm_jr_ways( &rntm_local );
|
||||
ir = bli_rntm_ir_ways( &rntm_local );
|
||||
nt = jc*pc*ic*jr*ir;
|
||||
}
|
||||
|
||||
#ifdef AOCL_DYNAMIC
|
||||
dim_t nt_ideal;
|
||||
|
||||
@@ -471,7 +497,7 @@ void zdscal_blis_impl
|
||||
dim_t n_elem_per_thread = n0 / nt;
|
||||
dim_t n_elem_rem = n0 % nt;
|
||||
|
||||
#pragma omp parallel num_threads( nt )
|
||||
_Pragma( "omp parallel num_threads(nt)" )
|
||||
{
|
||||
// Getting the actual number of threads that are spawned.
|
||||
dim_t nt_real = omp_get_num_threads();
|
||||
|
||||
@@ -565,7 +565,7 @@ void bli_multi_sgemv_4x2
|
||||
// Calculate the total number of multithreaded iteration
|
||||
total_iteration = b_n / b_fuse;
|
||||
|
||||
#pragma omp parallel for num_threads(n_threads)
|
||||
_Pragma( "omp parallel for num_threads(n_threads)" )
|
||||
for (dim_t j = 0; j < total_iteration; j++)
|
||||
{
|
||||
float *A1 = a + (b_fuse * j) * lda;
|
||||
|
||||
Reference in New Issue
Block a user