From bdbdd209967ab721a9ecd86736ac0cdb1e5ea8e7 Mon Sep 17 00:00:00 2001 From: Mangala V Date: Fri, 7 Oct 2022 07:29:09 -0400 Subject: [PATCH] Enabled MT path in DTRSM Small 1. Fixed accuracy issues in dtrsm small multi-thread. 2. Fixed out of bound memory accesses in the patch "Fixes to avoid Out of Bound Memory Access in TRSM small algorithm" 3. Re-enabled DTRSM small MT with above fixes. AMD-Internal: [CPUPL-2567] Change-Id: Ibf2949b25fde4007a92cc635526bed0e0d897800 --- frame/compat/bla_trsm_amd.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/frame/compat/bla_trsm_amd.c b/frame/compat/bla_trsm_amd.c index 1fc8899ce..9e4d35dfd 100644 --- a/frame/compat/bla_trsm_amd.c +++ b/frame/compat/bla_trsm_amd.c @@ -967,9 +967,41 @@ void dtrsm_blis_impl return; } } + // bli_trsm_small_mt is performing better than native multithread + // for certain sizes of m & n. +#ifdef BLIS_ENABLE_OPENMP + rntm_t rntm; + bli_rntm_init_from_global( &rntm ); + // Query the total number of threads from the rntm_t object. + dim_t n_threads = bli_rntm_num_threads( &rntm ); + if ( ( (n_threads > 1) && (m0 <= 1500) && (n0 <= 1500) ) || + ( (n_threads == 32) && (m0 <= 2300) && (n0 <= 2300) ) || + ( (n_threads == 16) && (m0 <= 3800) && (n0 <= 3800) ) || + ( (n_threads == 8) && (m0 <= 2800) && (n0 <= 2800) ) || + ( (n_threads == 4) && (m0 <= 2000) && (n0 <= 2000) ) || + ( (n_threads == 2) && (m0 <= 2000) && (n0 <= 2000) ) ) + { + err_t status; + status = bli_trsm_small_mt( + blis_side, + &alphao, + &ao, + &bo, + NULL, + NULL); + if ( status == BLIS_SUCCESS ) + { + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO); + /* Finalize BLIS. */ + bli_finalize_auto(); + return; + } + } +#endif// BLIS_ENABLE_OPENMP } // bli_cpuid_is_avx_supported #endif// END of BLIS_ENABLE_SMALL_MATRIX_TRSM + bli_trsmnat ( blis_side,