mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Fixed minor perf bug in gemm_ker_var2.
Details: - Fixed a minor performance bug in bli_gemm_ker_var2.c (and the experimental bli_gemm_ker_var5.c) whereby the addresses for a_next and b_next are not computed correctly (ie: do not wraparound) at the edge cases. Thanks to Tze Meng for helping me identify this bug.
This commit is contained in:
@@ -259,11 +259,11 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = a1 + rstep_a; \
|
||||
if ( i == m_iter - 1 && m_left == 0 ) \
|
||||
if ( i == m_iter - 1 ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = b1 + cstep_b; \
|
||||
if ( j == n_iter - 1 && n_left == 0 ) \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -256,7 +256,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = a1 + rstep_a; \
|
||||
if ( i == m_iter - 1 && m_left == 0 ) \
|
||||
if ( i == m_iter - 1 ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
} \
|
||||
|
||||
Reference in New Issue
Block a user