From db072a5b4a039a9a668ef951333ecfb5bd3a74b9 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 23 Apr 2013 17:49:10 -0500 Subject: [PATCH] Fixed rare edge case bug in herk_l macro-kernel. Details: - Fixed a potential bug in herk_l at the m_left edge case. If MR was chosen to be much larger than NR, then one could encounter edge cases in the the MC dimension that fall entirely below the diagonal, which the previous implementation of the herk_l macro-kernel was not allowing for. --- frame/3/herk/bli_herk_l_ker_var2.c | 53 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/frame/3/herk/bli_herk_l_ker_var2.c b/frame/3/herk/bli_herk_l_ker_var2.c index 21a7736fc..1df30ff02 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.c +++ b/frame/3/herk/bli_herk_l_ker_var2.c @@ -302,27 +302,50 @@ void PASTEMAC(ch,varname)( \ c11 += rstep_c; \ } \ \ - /* Bottom edge handling. (Note that the diagonal is guaranteed not - to factor in here.) */ \ + /* Bottom edge handling. */ \ if ( m_left ) \ { \ + /* Compute the diagonal offset for the submatrix at (i,j). */ \ + diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ +\ /* Compute the address of the next panel of A. */ \ a2 = a_cast; \ \ - /* Invoke the gemm micro-kernel. */ \ - PASTEMAC(ch,ukrname)( k, \ - alpha_cast, \ - a1, \ - bp, \ - zero, \ - ct, rs_ct, cs_ct, \ - a2, b2 ); \ + if ( bli_intersects_diag_n( diagoffc_ij, m_left, NR ) ) \ + { \ + /* Invoke the gemm micro-kernel. */ \ + PASTEMAC(ch,ukrname)( k, \ + alpha_cast, \ + a1, \ + bp, \ + zero, \ + ct, rs_ct, cs_ct, \ + a2, b2 ); \ \ - /* Scale the bottom edge of C and add the result. */ \ - PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \ - ct, rs_ct, cs_ct, \ - beta_cast, \ - c11, rs_c, cs_c ); \ + /* Scale C and add the result to only the stored part. */ \ + PASTEMAC3(ch,ch,ch,xpbys_mxn_l)( diagoffc_ij, \ + m_left, NR, \ + ct, rs_ct, cs_ct, \ + beta_cast, \ + c11, rs_c, cs_c ); \ + } \ + else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_left, NR ) ) \ + { \ + /* Invoke the gemm micro-kernel. */ \ + PASTEMAC(ch,ukrname)( k, \ + alpha_cast, \ + a1, \ + bp, \ + zero, \ + ct, rs_ct, cs_ct, \ + a2, b2 ); \ +\ + /* Scale the bottom edge of C and add the result. */ \ + PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \ + ct, rs_ct, cs_ct, \ + beta_cast, \ + c11, rs_c, cs_c ); \ + } \ } \ \ b1 += cstep_b; \