From d6fcfe734517a1a53fb0fa38d9a650841c9f09b0 Mon Sep 17 00:00:00 2001 From: Madan mohan Manokar Date: Fri, 17 Sep 2021 15:32:47 +0530 Subject: [PATCH] gemmt SUP limitThread count for small sizes 1. Max thread cap added for small dimension based on product(n*k). AMD-Internal: [CPUPL-1388] Change-Id: I34412a1374bb58a9c4b3fd8e40949a69006cf057 --- frame/3/bli_l3_sup.c | 9 ++++++++- frame/base/bli_rntm.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/frame/3/bli_l3_sup.c b/frame/3/bli_l3_sup.c index 163a828f8..a7d7a7874 100644 --- a/frame/3/bli_l3_sup.c +++ b/frame/3/bli_l3_sup.c @@ -158,7 +158,6 @@ printf( "dims: %d %d %d (threshs: %d %d %d)\n", AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); } - err_t bli_gemmtsup ( obj_t* alpha, @@ -243,6 +242,14 @@ err_t bli_gemmtsup if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } else { rntm_l = *rntm; rntm = &rntm_l; } +#ifdef AOCL_DYNAMIC + // If dynamic-threading is enabled, calculate optimum number + // of threads and update in rntm + + // Limit the number of thread for smaller sizes. + bli_nthreads_optimum( a, b, c, BLIS_GEMMT, rntm ); +#endif + #if 0 const num_t dt = bli_obj_dt( c ); const dim_t m = bli_obj_length( c ); diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index ba878ac6d..6a100bbe8 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -605,6 +605,22 @@ void bli_nthreads_optimum( if(m<=512 && n<=512) n_threads_ideal = 4; } + else if( family == BLIS_GEMMT && bli_obj_is_double(c) ) + { + dim_t n = bli_obj_length(c); + dim_t k = bli_obj_width_after_trans(a); + dim_t product = (n*k)>>4; /* product is derived based on n and k */ + // Limit the number thread for smaller sizes: + if(product <= 346) + { + n_threads_ideal = 1; + } + /* finer threshold needs to set for max_thread cap of 2,3,4,5,6..32 */ + else + { + n_threads_ideal = n_threads; + } + } dim_t n_threads_opt = bli_min(n_threads, n_threads_ideal);