From 75f72b7f6ea87ffafce315e456d5d9cecd4e07bf Mon Sep 17 00:00:00 2001 From: Nallani Bhaskar Date: Thu, 17 Jun 2021 23:53:12 +0530 Subject: [PATCH] Added aocl dynamic feature for dtrsm for small sizes Details: 1. Added aocl-dynamic for dtrsm native path When (m,n)<512 better performance observed for nthreads=4 2. Updated trsm_small threshold such that when (m+n)<320 trsm_small is doing better than native irrespective of number of threads Change-Id: Ic2c50f14db257a05e323cc97c5d1c9b73b68f487 --- frame/3/trsm/bli_trsm_front.c | 5 +++++ frame/base/bli_rntm.c | 10 ++++++++++ frame/compat/bla_trsm.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index 0c3bb11d2..f964faf0d 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -126,6 +126,11 @@ void bli_trsm_front bli_obj_set_as_root( &b_local ); bli_obj_set_as_root( &c_local ); +#ifdef AOCL_DYNAMIC + // If dynamic-threading is enabled, calculate optimum number + // of threads and update in rntm + bli_nthreads_optimum(a, b, b, BLIS_TRSM, rntm ); +#endif // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index 09cd45ad1..419c65dcb 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -556,6 +556,16 @@ void bli_nthreads_optimum( else n_threads_ideal = n_threads; } + else if( family == BLIS_TRSM && bli_obj_is_double(c)) + { + dim_t m = bli_obj_length(c); + dim_t n = bli_obj_width(c); + + if(m<=512 && n<=512) + n_threads_ideal = 4; + + } + dim_t n_threads_opt = bli_min(n_threads, n_threads_ideal); bli_pthread_mutex_lock( &global_rntm_mutex ); diff --git a/frame/compat/bla_trsm.c b/frame/compat/bla_trsm.c index 5a839485b..95ae079cc 100644 --- a/frame/compat/bla_trsm.c +++ b/frame/compat/bla_trsm.c @@ -602,7 +602,7 @@ void dtrsm_ * is doing better than native multithread */ bool nt = bli_thread_get_is_parallel(); if((nt==0 && m0<=1000 && n0<=1000) || - (nt && m0<=128 && n0<=128 ) ) + (nt && (m0+n0)<320) ) { err_t status; status = bli_trsm_small