mirror of
https://github.com/amd/blis.git
synced 2026-05-12 10:05:38 +00:00
Fine tuned threshold and aocl dynamic for zgemm for skinny matrices.
-Updated optimal threads in zgemm sup path for skinny matrices. -Fine tuned the threshold values for small and sup paths to improve overall zgemm. -Zgemm small is selected for inputs with transb as N. -Redirection of input among small, sup and native path was fine tuned. AMD-Internal : [CPUPL-1900] Change-Id: Ide37c8255def770b4b74bc6e7c6edb5ee15d3b1f
This commit is contained in:
@@ -231,9 +231,9 @@ void bli_cntx_init_zen( cntx_t* cntx )
|
||||
|
||||
// Initialize sup thresholds with architecture-appropriate values.
|
||||
// s d c z
|
||||
bli_blksz_init_easy( &thresh[ BLIS_MT ], 512, 256, 380, 110 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_MT ], 512, 256, 380, 128 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_NT ], 512, 256, 256, 128 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_KT ], 440, 220, 220, 110 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_KT ], 440, 220, 220, 128 );
|
||||
|
||||
// Initialize the context with the sup thresholds.
|
||||
bli_cntx_set_l3_sup_thresh
|
||||
|
||||
@@ -624,14 +624,29 @@ void bli_nthreads_optimum(
|
||||
dim_t n = bli_obj_width(c);
|
||||
dim_t k = bli_obj_width_after_trans(a);
|
||||
|
||||
if((m<=128 || n<=128 || k<=128) && ((m+n+k) <= 400) )
|
||||
if((m<=128 || n<=128 || k<=128) && ((m+n+k) <= 400))
|
||||
{
|
||||
n_threads_ideal = 8;
|
||||
}
|
||||
else if((m<=256 || n<=256 || k<=256) && ((m+n+k) <= 800) )
|
||||
else if((m<=256 || n<=256 || k<=256) && ((m+n+k) <= 800))
|
||||
{
|
||||
n_threads_ideal = 16;
|
||||
}
|
||||
if((m<=48) || (n<=48) || (k<=48))
|
||||
{
|
||||
if((m+n+k) <= 840)
|
||||
{
|
||||
n_threads_ideal = 8;
|
||||
}
|
||||
else if((m+n+k) <= 1240)
|
||||
{
|
||||
n_threads_ideal = 16;
|
||||
}
|
||||
else if((m+n+k) <= 1540)
|
||||
{
|
||||
n_threads_ideal = 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( family == BLIS_SYRK && bli_obj_is_double(c))
|
||||
{
|
||||
|
||||
@@ -762,7 +762,7 @@ void zgemm_
|
||||
- For single thread, the API has no constraints before invoking.
|
||||
- For multiple threads, the constraint is that m and n should individually be less than 128.
|
||||
*/
|
||||
if((k0==1) && ((nt==0) || ((nt==1) && (m0 < 128) && (n0 < 128)))
|
||||
if((k0 == 1) && ((nt == 0) || ((nt == 1) && (m0 < 128) && (n0 < 128)))
|
||||
&& bli_is_notrans(blis_transa)
|
||||
&& bli_is_notrans(blis_transb))
|
||||
{
|
||||
@@ -853,9 +853,11 @@ void zgemm_
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
if (((nt == 0) && (m0 <= 40) && (n0 <= 40) && (k0 <= 512)) ||
|
||||
if (((nt == 0) && (((m0 <= 40) && (n0 <= 40)) ||
|
||||
(m0 <= 128) && (n0 <= 128) && bli_is_notrans(blis_transb)) && (k0 <= 512)) ||
|
||||
((nt == 1) && (((m0 <= 32) || (n0 <= 32) || (k0 <= 32)) && ((m0 + n0 + k0) <= 100))))
|
||||
{
|
||||
err_t status = BLIS_NOT_YET_IMPLEMENTED;
|
||||
|
||||
Reference in New Issue
Block a user