mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Fixed rare edge case bug in herk_l macro-kernel.
Details: - Fixed a potential bug in herk_l at the m_left edge case. If MR was chosen to be much larger than NR, then one could encounter edge cases in the the MC dimension that fall entirely below the diagonal, which the previous implementation of the herk_l macro-kernel was not allowing for.
This commit is contained in:
@@ -302,27 +302,50 @@ void PASTEMAC(ch,varname)( \
|
||||
c11 += rstep_c; \
|
||||
} \
|
||||
\
|
||||
/* Bottom edge handling. (Note that the diagonal is guaranteed not
|
||||
to factor in here.) */ \
|
||||
/* Bottom edge handling. */ \
|
||||
if ( m_left ) \
|
||||
{ \
|
||||
/* Compute the diagonal offset for the submatrix at (i,j). */ \
|
||||
diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \
|
||||
\
|
||||
/* Compute the address of the next panel of A. */ \
|
||||
a2 = a_cast; \
|
||||
\
|
||||
/* Invoke the gemm micro-kernel. */ \
|
||||
PASTEMAC(ch,ukrname)( k, \
|
||||
alpha_cast, \
|
||||
a1, \
|
||||
bp, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
if ( bli_intersects_diag_n( diagoffc_ij, m_left, NR ) ) \
|
||||
{ \
|
||||
/* Invoke the gemm micro-kernel. */ \
|
||||
PASTEMAC(ch,ukrname)( k, \
|
||||
alpha_cast, \
|
||||
a1, \
|
||||
bp, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the bottom edge of C and add the result. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
/* Scale C and add the result to only the stored part. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn_l)( diagoffc_ij, \
|
||||
m_left, NR, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_left, NR ) ) \
|
||||
{ \
|
||||
/* Invoke the gemm micro-kernel. */ \
|
||||
PASTEMAC(ch,ukrname)( k, \
|
||||
alpha_cast, \
|
||||
a1, \
|
||||
bp, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the bottom edge of C and add the result. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
b1 += cstep_b; \
|
||||
|
||||
Reference in New Issue
Block a user