From 9c6777fc6bf1cf7d7036b024968991fcab1b3fb3 Mon Sep 17 00:00:00 2001 From: "Sharma, Shubham" Date: Fri, 22 Aug 2025 22:15:40 +0530 Subject: [PATCH] Fix DTRSM small threshold for extremely skinny sizes for ZEN5 (#151) - Logic to determine if small code path should be taken or not does not take into account if matrix A is too large. - Added a condition to use native code path if matrix A is very large. AMD-Internal: [CPUPL-7201] --- frame/compat/bla_trsm_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frame/compat/bla_trsm_amd.c b/frame/compat/bla_trsm_amd.c index b9fc46abe..d29497d24 100644 --- a/frame/compat/bla_trsm_amd.c +++ b/frame/compat/bla_trsm_amd.c @@ -1183,7 +1183,7 @@ void dtrsm_blis_impl { ker_ft = bli_trsm_small_zen4; } - else if ( (log10(n0) + (0.65*log10(m0)) ) < 4.4 ) + else if ( (log10(n0) + (0.65*log10(m0)) ) < 4.4 && ( m0 < 4500 ) ) { ker_ft = bli_trsm_small_zen5; } @@ -1194,7 +1194,7 @@ void dtrsm_blis_impl { ker_ft = bli_trsm_small_zen4; } - else if ( (log10(m0) + (0.85*log10(n0)) ) < 5 ) + else if ( (log10(m0) + (0.85*log10(n0)) ) < 5 && ( n0 < 4500 )) { ker_ft = bli_trsm_small_zen5; }