mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Fix DTRSM small threshold for extremely skinny sizes for ZEN5 (#151)
- Logic to determine if small code path should be taken or not does not take into account if matrix A is too large. - Added a condition to use native code path if matrix A is very large. AMD-Internal: [CPUPL-7201]
This commit is contained in:
@@ -1183,7 +1183,7 @@ void dtrsm_blis_impl
|
||||
{
|
||||
ker_ft = bli_trsm_small_zen4;
|
||||
}
|
||||
else if ( (log10(n0) + (0.65*log10(m0)) ) < 4.4 )
|
||||
else if ( (log10(n0) + (0.65*log10(m0)) ) < 4.4 && ( m0 < 4500 ) )
|
||||
{
|
||||
ker_ft = bli_trsm_small_zen5;
|
||||
}
|
||||
@@ -1194,7 +1194,7 @@ void dtrsm_blis_impl
|
||||
{
|
||||
ker_ft = bli_trsm_small_zen4;
|
||||
}
|
||||
else if ( (log10(m0) + (0.85*log10(n0)) ) < 5 )
|
||||
else if ( (log10(m0) + (0.85*log10(n0)) ) < 5 && ( n0 < 4500 ))
|
||||
{
|
||||
ker_ft = bli_trsm_small_zen5;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user