mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Bugfix: Tuned zgemm threshold for zen4 (#129)
* Bugfix: Tuned zgemm threshold for zen4 Threshold tuning that determines whether SUP or native path should be used for given input matrix size. This tuning forces skinny matrices to take SUP path to ensure better performance. * Bugfix: Tuned zgemm threshold for zen4 and zen5 Threshold tuning that determines whether SUP or native path should be used for given input matrix size. This tuning forces skinny matrices to take SUP path to ensure better performance. --------- Co-authored-by: harsdave <harsdave@amd.com>
This commit is contained in:
@@ -87,7 +87,7 @@ bool bli_cntx_gemmsup_thresh_is_met_zen4( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
// The threshold for m is a single value, but for n, it is
|
||||
// also based on the packing size of A, since the kernels are
|
||||
// column preferential
|
||||
if( ( m <= 1380 ) || ( n <= 1520 ) || ( k <= 128 ) ) return TRUE;
|
||||
if( ( ( ( m <= 3400 ) || ( n <= 1800 ) ) && ( k <= 128 ) ) && ( m + n + k < 6400 ) ) return TRUE;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
// The threshold for m is a single value, but for n, it is
|
||||
// also based on the packing size of A, since the kernels are
|
||||
// column preferential
|
||||
if( ( m <= 1380 ) || ( n <= 1520 ) || ( k <= 128 ) ) return TRUE;
|
||||
if( ( ( ( m <= 3400 ) || ( n <= 1800 ) ) && ( k <= 128 ) ) && ( m + n + k < 6400 ) ) return TRUE;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user