gemmt SUP limitThread count for small sizes

1. Max thread cap added for small dimension based on product(n*k).

AMD-Internal: [CPUPL-1388]

Change-Id: I34412a1374bb58a9c4b3fd8e40949a69006cf057
This commit is contained in:
Madan mohan Manokar
2021-09-17 15:32:47 +05:30
committed by Dipal M Zambare
parent 590c763e22
commit d6fcfe7345
2 changed files with 24 additions and 1 deletions

View File

@@ -158,7 +158,6 @@ printf( "dims: %d %d %d (threshs: %d %d %d)\n",
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2);
}
err_t bli_gemmtsup
(
obj_t* alpha,
@@ -243,6 +242,14 @@ err_t bli_gemmtsup
if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; }
else { rntm_l = *rntm; rntm = &rntm_l; }
#ifdef AOCL_DYNAMIC
// If dynamic-threading is enabled, calculate optimum number
// of threads and update in rntm
// Limit the number of thread for smaller sizes.
bli_nthreads_optimum( a, b, c, BLIS_GEMMT, rntm );
#endif
#if 0
const num_t dt = bli_obj_dt( c );
const dim_t m = bli_obj_length( c );

View File

@@ -605,6 +605,22 @@ void bli_nthreads_optimum(
if(m<=512 && n<=512)
n_threads_ideal = 4;
}
else if( family == BLIS_GEMMT && bli_obj_is_double(c) )
{
dim_t n = bli_obj_length(c);
dim_t k = bli_obj_width_after_trans(a);
dim_t product = (n*k)>>4; /* product is derived based on n and k */
// Limit the number thread for smaller sizes:
if(product <= 346)
{
n_threads_ideal = 1;
}
/* finer threshold needs to set for max_thread cap of 2,3,4,5,6..32 */
else
{
n_threads_ideal = n_threads;
}
}
dim_t n_threads_opt = bli_min(n_threads, n_threads_ideal);