mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Changing ZGEMM SUP threshold logic for zen5 to fix performance regression (#233)
- Revert the logical operator from OR (||) to AND (&&) in the DCOMPLEX (ZGEMM) SUP threshold condition for k <= 128. The previous change to OR logic was causing performance regressions for certain input sizes by incorrectly routing cases to the SUP path when the native path would be more optimal.
This commit is contained in:
@@ -87,7 +87,7 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
// The threshold for m is a single value, but for n, it is
|
||||
// also based on the packing size of A, since the kernels are
|
||||
// column preferential
|
||||
if( ( ( m <= 1380 ) || ( n <= 1520 ) || ( k <= 128 ) ) && ( m + n + k < 6400 ) ) return TRUE;
|
||||
if ( ( ( m <= 1380 ) || ( n <= 1520 ) && ( k <= 128 ) ) && ( m + n + k < 6400 ) ) return TRUE;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user