mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Added aocl dynamic feature for dtrsm for small sizes
Details: 1. Added aocl-dynamic for dtrsm native path When (m,n)<512 better performance observed for nthreads=4 2. Updated trsm_small threshold such that when (m+n)<320 trsm_small is doing better than native irrespective of number of threads Change-Id: Ic2c50f14db257a05e323cc97c5d1c9b73b68f487
This commit is contained in:
@@ -126,6 +126,11 @@ void bli_trsm_front
|
||||
bli_obj_set_as_root( &b_local );
|
||||
bli_obj_set_as_root( &c_local );
|
||||
|
||||
#ifdef AOCL_DYNAMIC
|
||||
// If dynamic-threading is enabled, calculate optimum number
|
||||
// of threads and update in rntm
|
||||
bli_nthreads_optimum(a, b, b, BLIS_TRSM, rntm );
|
||||
#endif
|
||||
// Parse and interpret the contents of the rntm_t object to properly
|
||||
// set the ways of parallelism for each loop, and then make any
|
||||
// additional modifications necessary for the current operation.
|
||||
|
||||
@@ -556,6 +556,16 @@ void bli_nthreads_optimum(
|
||||
else
|
||||
n_threads_ideal = n_threads;
|
||||
}
|
||||
else if( family == BLIS_TRSM && bli_obj_is_double(c))
|
||||
{
|
||||
dim_t m = bli_obj_length(c);
|
||||
dim_t n = bli_obj_width(c);
|
||||
|
||||
if(m<=512 && n<=512)
|
||||
n_threads_ideal = 4;
|
||||
|
||||
}
|
||||
|
||||
dim_t n_threads_opt = bli_min(n_threads, n_threads_ideal);
|
||||
|
||||
bli_pthread_mutex_lock( &global_rntm_mutex );
|
||||
|
||||
@@ -602,7 +602,7 @@ void dtrsm_
|
||||
* is doing better than native multithread */
|
||||
bool nt = bli_thread_get_is_parallel();
|
||||
if((nt==0 && m0<=1000 && n0<=1000) ||
|
||||
(nt && m0<=128 && n0<=128 ) )
|
||||
(nt && (m0+n0)<320) )
|
||||
{
|
||||
err_t status;
|
||||
status = bli_trsm_small
|
||||
|
||||
Reference in New Issue
Block a user