From a6990434173b0cf651f8521194f3aef738deb7d2 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Thu, 18 Apr 2013 13:52:47 -0500 Subject: [PATCH] Fixed bug in packing block of A for hemm/symm. Details: - Fixed a bug in bli_packm_blk_var2() that affected the packing functionality of hemm and symm. The bug occurs whenever attempting to pack a Hermitian or symmetric matrix where the block of A being packed intersects the diagonal, but some of its micro-panels do not intersect the diagonal and lie completely in the unstored region. Thanks to Francisco Igual for reporting this bug. - Comment updates to both _blk_var2.c and _blk_var3.c. --- frame/1m/packm/bli_packm_blk_var2.c | 31 +++++++++++++++++++++++++---- frame/1m/packm/bli_packm_blk_var3.c | 8 -------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/frame/1m/packm/bli_packm_blk_var2.c b/frame/1m/packm/bli_packm_blk_var2.c index 54ab0a200..7468192fa 100644 --- a/frame/1m/packm/bli_packm_blk_var2.c +++ b/frame/1m/packm/bli_packm_blk_var2.c @@ -352,13 +352,36 @@ void PASTEMAC(ch,varname )( \ } \ else \ { \ + /* We use some c10-specific variables here because we might need + to change them if the current panel is unstored. (The values + below are used if the current panel is stored.) */ \ + c10 = c_begin; \ + incc10 = incc; \ + ldc10 = ldc; \ + conjc10 = conjc; \ +\ + /* If the current panel is unstored, we need to make a few + adjustments so we refer to the data where it is actually + stored, and so we take conjugation into account. (Note + this implicitly assumes we are operating on a symmetric or + Hermitian matrix.) */ \ + if ( bli_is_unstored_subpart_n( diagoffc_i, uploc, panel_dim_i, panel_len ) ) \ + { \ + c10 = c10 + diagoffc_i * ( doff_t )cs_c + \ + -diagoffc_i * ( doff_t )rs_c; \ + bli_swap_incs( incc10, ldc10 ); \ +\ + if ( bli_is_hermitian( strucc ) ) \ + bli_toggle_conj( conjc10 ); \ + } \ +\ /* Pack the current panel. */ \ - PASTEMAC(ch,packm_cxk)( conjc, \ + PASTEMAC(ch,packm_cxk)( conjc10, \ panel_dim_i, \ panel_len, \ beta_cast, \ - c_begin, incc, ldc, \ - p_begin, ldp ); \ + c10, incc10, ldc10, \ + p_begin, ldp ); \ \ /* PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: c", panel_len, panel_dim_i, \ @@ -410,7 +433,7 @@ void PASTEMAC(ch,varname )( \ /* if ( rs_p == 1 ) \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: a copied", m_panel_max, n_panel_max, \ - p_begin, 1, panel_dim, "%5.2f", "" ); \ + p_begin, 1, panel_dim, "%4.1f", "" ); \ if ( cs_p == 1 ) \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: b copied", m_panel_max, n_panel_max, \ p_begin, panel_dim, 1, "%6.3f", "" ); \ diff --git a/frame/1m/packm/bli_packm_blk_var3.c b/frame/1m/packm/bli_packm_blk_var3.c index 608379ad2..3a81fa139 100644 --- a/frame/1m/packm/bli_packm_blk_var3.c +++ b/frame/1m/packm/bli_packm_blk_var3.c @@ -429,11 +429,6 @@ void PASTEMAC(ch,varname )( \ inc_t rs_pe = 1; \ inc_t cs_pe = panel_dim; \ ctype* p_edge = p_begin + (i )*rs_pe + (j )*cs_pe; \ -\ -/* - PASTEMAC(ch,fprintm)( stdout, "packm_var3: p setting br unit diag", m_br, n_br, \ - p_edge, rs_pe, cs_pe, "%5.2f", "" ); \ -*/ \ \ PASTEMAC2(ch,ch,setd_unb_var1)( 0, \ m_br, \ @@ -454,9 +449,6 @@ void PASTEMAC(ch,varname )( \ \ p_begin += p_inc; \ } \ -\ - /*PASTEMAC(ch,fprintm)( stdout, "p copied", panel_dim, 24, \ - p_cast, rs_p, cs_p, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC( packm, packm_blk_var3 )