mirror of
https://github.com/amd/blis.git
synced 2026-05-24 10:24:34 +00:00
Enabled MT path in DTRSM Small
1. Fixed accuracy issues in dtrsm small multi-thread. 2. Fixed out of bound memory accesses in the patch "Fixes to avoid Out of Bound Memory Access in TRSM small algorithm" 3. Re-enabled DTRSM small MT with above fixes. AMD-Internal: [CPUPL-2567] Change-Id: Ibf2949b25fde4007a92cc635526bed0e0d897800
This commit is contained in:
@@ -967,9 +967,41 @@ void dtrsm_blis_impl
|
||||
return;
|
||||
}
|
||||
}
|
||||
// bli_trsm_small_mt is performing better than native multithread
|
||||
// for certain sizes of m & n.
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
rntm_t rntm;
|
||||
bli_rntm_init_from_global( &rntm );
|
||||
// Query the total number of threads from the rntm_t object.
|
||||
dim_t n_threads = bli_rntm_num_threads( &rntm );
|
||||
if ( ( (n_threads > 1) && (m0 <= 1500) && (n0 <= 1500) ) ||
|
||||
( (n_threads == 32) && (m0 <= 2300) && (n0 <= 2300) ) ||
|
||||
( (n_threads == 16) && (m0 <= 3800) && (n0 <= 3800) ) ||
|
||||
( (n_threads == 8) && (m0 <= 2800) && (n0 <= 2800) ) ||
|
||||
( (n_threads == 4) && (m0 <= 2000) && (n0 <= 2000) ) ||
|
||||
( (n_threads == 2) && (m0 <= 2000) && (n0 <= 2000) ) )
|
||||
{
|
||||
err_t status;
|
||||
status = bli_trsm_small_mt(
|
||||
blis_side,
|
||||
&alphao,
|
||||
&ao,
|
||||
&bo,
|
||||
NULL,
|
||||
NULL);
|
||||
if ( status == BLIS_SUCCESS )
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif// BLIS_ENABLE_OPENMP
|
||||
} // bli_cpuid_is_avx_supported
|
||||
#endif// END of BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
|
||||
bli_trsmnat
|
||||
(
|
||||
blis_side,
|
||||
|
||||
Reference in New Issue
Block a user