Enabled MT path in DTRSM Small

1. Fixed accuracy issues in dtrsm small multi-thread.
2. Fixed out of bound memory accesses in the patch
   "Fixes to avoid Out of Bound Memory Access in TRSM small algorithm"
3. Re-enabled DTRSM small MT with above fixes.

AMD-Internal: [CPUPL-2567]
Change-Id: Ibf2949b25fde4007a92cc635526bed0e0d897800
This commit is contained in:
Mangala V
2022-10-07 07:29:09 -04:00
committed by Mangala V
parent 492555785a
commit bdbdd20996

View File

@@ -967,9 +967,41 @@ void dtrsm_blis_impl
return;
}
}
// bli_trsm_small_mt is performing better than native multithread
// for certain sizes of m & n.
#ifdef BLIS_ENABLE_OPENMP
rntm_t rntm;
bli_rntm_init_from_global( &rntm );
// Query the total number of threads from the rntm_t object.
dim_t n_threads = bli_rntm_num_threads( &rntm );
if ( ( (n_threads > 1) && (m0 <= 1500) && (n0 <= 1500) ) ||
( (n_threads == 32) && (m0 <= 2300) && (n0 <= 2300) ) ||
( (n_threads == 16) && (m0 <= 3800) && (n0 <= 3800) ) ||
( (n_threads == 8) && (m0 <= 2800) && (n0 <= 2800) ) ||
( (n_threads == 4) && (m0 <= 2000) && (n0 <= 2000) ) ||
( (n_threads == 2) && (m0 <= 2000) && (n0 <= 2000) ) )
{
err_t status;
status = bli_trsm_small_mt(
blis_side,
&alphao,
&ao,
&bo,
NULL,
NULL);
if ( status == BLIS_SUCCESS )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
}
#endif// BLIS_ENABLE_OPENMP
} // bli_cpuid_is_avx_supported
#endif// END of BLIS_ENABLE_SMALL_MATRIX_TRSM
bli_trsmnat
(
blis_side,