Fixed rare edge case bug in herk_l macro-kernel.

Details:
- Fixed a potential bug in herk_l at the m_left edge case. If MR was
  chosen to be much larger than NR, then one could encounter edge cases
  in the the MC dimension that fall entirely below the diagonal, which
  the previous implementation of the herk_l macro-kernel was not allowing
  for.
This commit is contained in:
Field G. Van Zee
2013-04-23 17:49:10 -05:00
parent 1dab11e37d
commit db072a5b4a

View File

@@ -302,27 +302,50 @@ void PASTEMAC(ch,varname)( \
c11 += rstep_c; \
} \
\
/* Bottom edge handling. (Note that the diagonal is guaranteed not
to factor in here.) */ \
/* Bottom edge handling. */ \
if ( m_left ) \
{ \
/* Compute the diagonal offset for the submatrix at (i,j). */ \
diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \
\
/* Compute the address of the next panel of A. */ \
a2 = a_cast; \
\
/* Invoke the gemm micro-kernel. */ \
PASTEMAC(ch,ukrname)( k, \
alpha_cast, \
a1, \
bp, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
if ( bli_intersects_diag_n( diagoffc_ij, m_left, NR ) ) \
{ \
/* Invoke the gemm micro-kernel. */ \
PASTEMAC(ch,ukrname)( k, \
alpha_cast, \
a1, \
bp, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
\
/* Scale the bottom edge of C and add the result. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
/* Scale C and add the result to only the stored part. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn_l)( diagoffc_ij, \
m_left, NR, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_left, NR ) ) \
{ \
/* Invoke the gemm micro-kernel. */ \
PASTEMAC(ch,ukrname)( k, \
alpha_cast, \
a1, \
bp, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
\
/* Scale the bottom edge of C and add the result. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
} \
\
b1 += cstep_b; \