From c255a293e25b2223c88e8800267cd06ad2a90041 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 10 Feb 2014 14:31:24 -0600 Subject: [PATCH] Consolidated packm_blk_var2 and var3. Details: - Consolidated the functionality previously supported by packm_blk_var2() and packm_blk_var3() into a new variant, packm_blk_var1(). - Updates to packm_gen_cxk(), packm_herm_cxk.c(), and packm_tri_cxk() to accommodate above changes. - Removed packm_blk_var3() and retired packm_blk_var2() to frame/1m/packm/old. - Updated all level-3 _cntl_init() functions so that the new, more versatile packm_blk_var1 is used for all level-3 matrix packing. --- frame/1m/packm/bli_packm.h | 4 +- ..._packm_blk_var3.c => bli_packm_blk_var1.c} | 166 ++++++---- ..._packm_blk_var3.h => bli_packm_blk_var1.h} | 4 +- frame/1m/packm/bli_packm_gen_cxk.c | 52 ++-- frame/1m/packm/bli_packm_gen_cxk.h | 22 +- frame/1m/packm/bli_packm_herm_cxk.c | 286 ++++++++++-------- frame/1m/packm/bli_packm_herm_cxk.h | 22 +- frame/1m/packm/bli_packm_int.c | 6 +- frame/1m/packm/bli_packm_tri_cxk.c | 55 ++-- frame/1m/packm/bli_packm_tri_cxk.h | 26 +- frame/1m/packm/{ => old}/bli_packm_blk_var2.c | 12 +- frame/1m/packm/{ => old}/bli_packm_blk_var2.h | 0 frame/3/gemm/bli_gemm_cntl.c | 4 +- frame/3/herk/bli_herk_cntl.c | 4 +- frame/3/trmm/bli_trmm_cntl.c | 8 +- frame/3/trsm/bli_trsm_cntl.c | 8 +- frame/include/bli_param_macro_defs.h | 6 + testsuite/src/test_gemmtrsm_ukr.c | 4 +- testsuite/src/test_trsm_ukr.c | 2 +- 19 files changed, 369 insertions(+), 322 deletions(-) rename frame/1m/packm/{bli_packm_blk_var3.c => bli_packm_blk_var1.c} (72%) rename frame/1m/packm/{bli_packm_blk_var3.h => bli_packm_blk_var1.h} (97%) rename frame/1m/packm/{ => old}/bli_packm_blk_var2.c (96%) rename frame/1m/packm/{ => old}/bli_packm_blk_var2.h (100%) diff --git a/frame/1m/packm/bli_packm.h b/frame/1m/packm/bli_packm.h index 4dbe7328f..841072a28 100644 --- a/frame/1m/packm/bli_packm.h +++ b/frame/1m/packm/bli_packm.h @@ -41,9 +41,7 @@ #include "bli_packm_unb_var1.h" -#include "bli_packm_blk_var2.h" - -#include "bli_packm_blk_var3.h" +#include "bli_packm_blk_var1.h" #include "bli_packm_cxk.h" #include "bli_packm_gen_cxk.h" diff --git a/frame/1m/packm/bli_packm_blk_var3.c b/frame/1m/packm/bli_packm_blk_var1.c similarity index 72% rename from frame/1m/packm/bli_packm_blk_var3.c rename to frame/1m/packm/bli_packm_blk_var1.c index 4a3eb8225..f74dc6ccd 100644 --- a/frame/1m/packm/bli_packm_blk_var3.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -55,10 +55,10 @@ typedef void (*FUNCPTR_T)( dim_t pd_p, inc_t ps_p ); -static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3); +static FUNCPTR_T GENARRAY(ftypes,packm_blk_var1); -void bli_packm_blk_var3( obj_t* c, +void bli_packm_blk_var1( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -154,33 +154,35 @@ void PASTEMAC(ch,varname )( \ dim_t it, ic, ip; \ dim_t ic0, ip0; \ doff_t ic_inc, ip_inc; \ - dim_t panel_dim_max; \ - dim_t panel_len; \ - dim_t panel_len_max; \ doff_t diagoffc_i; \ doff_t diagoffc_inc; \ - dim_t panel_dim_i; \ + dim_t panel_len_full; \ dim_t panel_len_i; \ + dim_t panel_len_max; \ dim_t panel_len_max_i; \ + dim_t panel_dim_i; \ + dim_t panel_dim_max; \ dim_t panel_off_i; \ inc_t vs_c; \ inc_t ldc; \ inc_t ldp, p_inc; \ dim_t* m_panel_full; \ dim_t* n_panel_full; \ + dim_t* m_panel_use; \ + dim_t* n_panel_use; \ dim_t* m_panel_max; \ dim_t* n_panel_max; \ conj_t conjc; \ \ ctype* restrict c_use; \ ctype* restrict p_use; \ - dim_t* m_panel_use; \ - dim_t* n_panel_use; \ - doff_t diagoffp; \ + doff_t diagoffp_i; \ \ \ - /* If C is zeros, then we don't need to pack it. */ \ - if ( bli_is_zeros( uploc ) ) return; \ + /* If C is zeros and part of a triangular matrix, then we don't need + to pack it. */ \ + if ( bli_is_zeros( uploc ) && \ + bli_is_triangular( strucc ) ) return; \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ @@ -201,38 +203,38 @@ void PASTEMAC(ch,varname )( \ if ( bli_is_row_stored_f( rs_p, cs_p ) ) \ { \ /* Prepare to pack to row-stored column panels. */ \ - iter_dim = n; \ - panel_len = m; \ - panel_len_max = m_max; \ - panel_dim_max = pd_p; \ - ldc = rs_c; \ - vs_c = cs_c; \ - diagoffc_inc = -( doff_t)panel_dim_max; \ - ldp = rs_p; \ - m_panel_full = &m; \ - n_panel_full = &panel_dim_i; \ - m_panel_use = &panel_len_i; \ - n_panel_use = &panel_dim_i; \ - m_panel_max = &panel_len_max_i; \ - n_panel_max = &panel_dim_max; \ + iter_dim = n; \ + panel_len_full = m; \ + panel_len_max = m_max; \ + panel_dim_max = pd_p; \ + ldc = rs_c; \ + vs_c = cs_c; \ + diagoffc_inc = -( doff_t)panel_dim_max; \ + ldp = rs_p; \ + m_panel_full = &m; \ + n_panel_full = &panel_dim_i; \ + m_panel_use = &panel_len_i; \ + n_panel_use = &panel_dim_i; \ + m_panel_max = &panel_len_max_i; \ + n_panel_max = &panel_dim_max; \ } \ else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \ { \ /* Prepare to pack to column-stored row panels. */ \ - iter_dim = m; \ - panel_len = n; \ - panel_len_max = n_max; \ - panel_dim_max = pd_p; \ - ldc = cs_c; \ - vs_c = rs_c; \ - diagoffc_inc = ( doff_t )panel_dim_max; \ - ldp = cs_p; \ - m_panel_full = &panel_dim_i; \ - n_panel_full = &n; \ - m_panel_use = &panel_dim_i; \ - n_panel_use = &panel_len_i; \ - m_panel_max = &panel_dim_max; \ - n_panel_max = &panel_len_max_i; \ + iter_dim = m; \ + panel_len_full = n; \ + panel_len_max = n_max; \ + panel_dim_max = pd_p; \ + ldc = cs_c; \ + vs_c = rs_c; \ + diagoffc_inc = ( doff_t )panel_dim_max; \ + ldp = cs_p; \ + m_panel_full = &panel_dim_i; \ + n_panel_full = &n; \ + m_panel_use = &panel_dim_i; \ + n_panel_use = &panel_len_i; \ + m_panel_max = &panel_dim_max; \ + n_panel_max = &panel_len_max_i; \ } \ \ /* Compute the total number of iterations we'll need. */ \ @@ -240,8 +242,8 @@ void PASTEMAC(ch,varname )( \ \ /* Set the initial values and increments for indices related to C and P based on whether reverse iteration was requested. */ \ - if ( ( revifup && bli_is_upper( uploc ) ) || \ - ( reviflo && bli_is_lower( uploc ) ) ) \ + if ( ( revifup && bli_is_upper( uploc ) && bli_is_triangular( strucc ) ) || \ + ( reviflo && bli_is_lower( uploc ) && bli_is_triangular( strucc ) ) ) \ { \ ic0 = (num_iter - 1) * panel_dim_max; \ ic_inc = -panel_dim_max; \ @@ -266,19 +268,25 @@ void PASTEMAC(ch,varname )( \ diagoffc_i = diagoffc + (ip )*diagoffc_inc; \ c_begin = c_cast + (ic )*vs_c; \ \ - /* If the current panel is unstored, do nothing. (Notice that we use - the continue statement, so we don't even increment p_begin.) - If the current panel intersects the diagonal (and the matrix is - triangular), pack only as much as we need (ie: skip over as much - as possible on the unstored side of the diagonal). - Otherwise, we assume the current panel is full-length. */ \ - if ( bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \ + if ( bli_is_triangular( strucc ) && \ + bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \ { \ + /* This case executes if the panel belongs to a triangular + matrix AND is completely unstored (ie: zero). If the panel + is unstored, we do nothing. (Notice that we don't even + increment p_begin.) */ \ +\ continue; \ } \ - else if ( bli_intersects_diag_n( diagoffc_i, *m_panel_full, *n_panel_full ) && \ - bli_is_triangular( strucc ) ) \ + else if ( bli_is_triangular( strucc ) && \ + bli_intersects_diag_n( diagoffc_i, *m_panel_full, *n_panel_full ) ) \ { \ + /* This case executes if the panel belongs to a triangular + matrix AND is diagonal-intersecting. Notice that we + cannot bury the following conditional logic into + packm_tri_cxk() because we need to know the value of + panel_len_max_i so we can properly increment p_inc. */ \ +\ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register @@ -293,22 +301,22 @@ void PASTEMAC(ch,varname )( \ panel_off_i = 0; \ panel_len_i = bli_abs( diagoffc_i ) + panel_dim_i; \ panel_len_max_i = bli_abs( diagoffc_i ) + panel_dim_max; \ - diagoffp = diagoffc_i; \ + diagoffp_i = diagoffc_i; \ } \ else /* if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \ ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \ { \ panel_off_i = bli_abs( diagoffc_i ); \ - panel_len_i = panel_len - panel_off_i; \ - panel_len_max_i = panel_len_max - panel_off_i; \ - diagoffp = 0; \ + panel_len_i = panel_len_full - panel_off_i; \ + panel_len_max_i = panel_len_max - panel_off_i; \ + diagoffp_i = 0; \ } \ \ c_use = c_begin + (panel_off_i )*ldc; \ p_use = p_begin; \ \ PASTEMAC(ch,packm_tri_cxk)( strucc, \ - diagoffp, \ + diagoffp_i, \ diagc, \ uploc, \ conjc, \ @@ -324,9 +332,38 @@ void PASTEMAC(ch,varname )( \ \ p_inc = ldp * panel_len_max_i; \ } \ + else if ( bli_is_herm_or_symm( strucc ) ) \ + { \ + /* This case executes if the panel belongs to a Hermitian or + symmetric matrix, which includes stored, unstored, and + diagonal-intersecting panels. */ \ +\ + panel_len_i = panel_len_full; \ + panel_len_max_i = panel_len_max; \ +\ + PASTEMAC(ch,packm_herm_cxk)( strucc, \ + diagoffc_i, \ + uploc, \ + conjc, \ + *m_panel_use, \ + *n_panel_use, \ + *m_panel_max, \ + *n_panel_max, \ + kappa_cast, \ + c_begin, rs_c, cs_c, \ + p_begin, rs_p, cs_p ); \ +\ + /* NOTE: p_inc should be set to ps_p to properly support + BLIS_CONTIG_STRIDE_ALIGN_SIZE. */ \ + p_inc = ldp * panel_len_max_i; \ + } \ else \ { \ - panel_len_i = panel_len; \ + /* This case executes if the panel is general, or, if the + panel is part of a triangular matrix and is neither unstored + (ie: zero) nor diagonal-intersecting. */ \ +\ + panel_len_i = panel_len_full; \ panel_len_max_i = panel_len_max; \ \ PASTEMAC(ch,packm_gen_cxk)( BLIS_GENERAL, \ @@ -337,27 +374,30 @@ void PASTEMAC(ch,varname )( \ *n_panel_use, \ *m_panel_max, \ *n_panel_max, \ - kappa, \ + kappa_cast, \ c_begin, rs_c, cs_c, \ p_begin, rs_p, cs_p ); \ \ + /* NOTE: p_inc should be set to ps_p to properly support + BLIS_CONTIG_STRIDE_ALIGN_SIZE. */ \ p_inc = ldp * panel_len_max_i; \ } \ \ +\ + p_begin += p_inc; \ + } \ +\ \ /* if ( rs_p == 1 ) \ - PASTEMAC(ch,fprintm)( stdout, "packm_var3: ap copied", panel_dim_max, panel_len_max_i, \ + PASTEMAC(ch,fprintm)( stdout, "packm_var1: ap copied", panel_dim_max, panel_len_max_i, \ p_begin, rs_p, cs_p, "%4.1f", "" ); \ if ( cs_p == 1 ) \ - PASTEMAC(ch,fprintm)( stdout, "packm_var3: bp copied", panel_len_max_i, panel_dim_max, \ + PASTEMAC(ch,fprintm)( stdout, "packm_var1: bp copied", panel_len_max_i, panel_dim_max, \ p_begin, rs_p, cs_p, "%4.1f", "" ); \ */ \ \ -\ - p_begin += p_inc; \ - } \ } -INSERT_GENTFUNC_BASIC( packm, packm_blk_var3 ) +INSERT_GENTFUNC_BASIC( packm, packm_blk_var1 ) diff --git a/frame/1m/packm/bli_packm_blk_var3.h b/frame/1m/packm/bli_packm_blk_var1.h similarity index 97% rename from frame/1m/packm/bli_packm_blk_var3.h rename to frame/1m/packm/bli_packm_blk_var1.h index 8821c373b..5a2c356a5 100644 --- a/frame/1m/packm/bli_packm_blk_var3.h +++ b/frame/1m/packm/bli_packm_blk_var1.h @@ -32,7 +32,7 @@ */ -void bli_packm_blk_var3( obj_t* c, +void bli_packm_blk_var1( obj_t* c, obj_t* p ); @@ -58,5 +58,5 @@ void PASTEMAC(ch,varname)( \ dim_t pd_p, inc_t ps_p \ ); -INSERT_GENTPROT_BASIC( packm_blk_var3 ) +INSERT_GENTPROT_BASIC( packm_blk_var1 ) diff --git a/frame/1m/packm/bli_packm_gen_cxk.c b/frame/1m/packm/bli_packm_gen_cxk.c index ec5e9d1d1..fc170b967 100644 --- a/frame/1m/packm/bli_packm_gen_cxk.c +++ b/frame/1m/packm/bli_packm_gen_cxk.c @@ -38,23 +38,20 @@ #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - uplo_t uploc, \ - conj_t conjc, \ - dim_t m_panel, \ - dim_t n_panel, \ - dim_t m_panel_max, \ - dim_t n_panel_max, \ - void* kappa, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p \ + struc_t strucc, \ + doff_t diagoffc, \ + uplo_t uploc, \ + conj_t conjc, \ + dim_t m_panel, \ + dim_t n_panel, \ + dim_t m_panel_max, \ + dim_t n_panel_max, \ + ctype* restrict kappa, \ + ctype* restrict c, inc_t rs_c, inc_t cs_c, \ + ctype* restrict p, inc_t rs_p, inc_t cs_p \ ) \ { \ - ctype* restrict c_begin = c; \ - ctype* restrict p_begin = p; \ - ctype* restrict kappa_cast = kappa; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict zero = PASTEMAC(ch,0); \ \ dim_t panel_dim; \ dim_t panel_len; \ @@ -84,29 +81,14 @@ void PASTEMAC(ch,varname)( \ ldp = cs_p; \ } \ \ - /* If the current panel is unstored, we need to make a few - adjustments so we refer to the data where it is actually - stored, also taking conjugation into account. (Note this - implicitly assumes we are operating on a dense panel - within a larger symmetric or Hermitian matrix, since a - general matrix would not contain any unstored region.) */ \ - if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ - { \ - c_begin = c_begin + diagoffc * ( doff_t )cs_c + \ - -diagoffc * ( doff_t )rs_c; \ - bli_swap_incs( incc, ldc ); \ -\ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( conjc ); \ - } \ \ /* Pack the panel. */ \ PASTEMAC(ch,packm_cxk)( conjc, \ panel_dim, \ panel_len, \ - kappa_cast, \ - c_begin, incc, ldc, \ - p_begin, ldp ); \ + kappa, \ + c, incc, ldc, \ + p, ldp ); \ \ \ /* The packed memory region was acquired/allocated with "aligned" @@ -121,7 +103,7 @@ void PASTEMAC(ch,varname)( \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ - ctype* p_edge = p_begin + (i )*rs_p; \ + ctype* p_edge = p + (i )*rs_p; \ \ PASTEMAC2(ch,ch,setm_unb_var1)( 0, \ BLIS_NONUNIT_DIAG, \ @@ -137,7 +119,7 @@ void PASTEMAC(ch,varname)( \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ - ctype* p_edge = p_begin + (j )*cs_p; \ + ctype* p_edge = p + (j )*cs_p; \ \ PASTEMAC2(ch,ch,setm_unb_var1)( 0, \ BLIS_NONUNIT_DIAG, \ diff --git a/frame/1m/packm/bli_packm_gen_cxk.h b/frame/1m/packm/bli_packm_gen_cxk.h index 45d765a4e..8e610c37e 100644 --- a/frame/1m/packm/bli_packm_gen_cxk.h +++ b/frame/1m/packm/bli_packm_gen_cxk.h @@ -36,17 +36,17 @@ #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - uplo_t uploc, \ - conj_t conjc, \ - dim_t m_panel, \ - dim_t n_panel, \ - dim_t m_panel_max, \ - dim_t n_panel_max, \ - void* kappa, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p \ + struc_t strucc, \ + doff_t diagoffc, \ + uplo_t uploc, \ + conj_t conjc, \ + dim_t m_panel, \ + dim_t n_panel, \ + dim_t m_panel_max, \ + dim_t n_panel_max, \ + ctype* restrict kappa, \ + ctype* restrict c, inc_t rs_c, inc_t cs_c, \ + ctype* restrict p, inc_t rs_p, inc_t cs_p \ ); INSERT_GENTPROT_BASIC( packm_gen_cxk ) diff --git a/frame/1m/packm/bli_packm_herm_cxk.c b/frame/1m/packm/bli_packm_herm_cxk.c index 876268868..a6ed0506f 100644 --- a/frame/1m/packm/bli_packm_herm_cxk.c +++ b/frame/1m/packm/bli_packm_herm_cxk.c @@ -38,23 +38,20 @@ #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - uplo_t uploc, \ - conj_t conjc, \ - dim_t m_panel, \ - dim_t n_panel, \ - dim_t m_panel_max, \ - dim_t n_panel_max, \ - void* kappa, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p \ + struc_t strucc, \ + doff_t diagoffc, \ + uplo_t uploc, \ + conj_t conjc, \ + dim_t m_panel, \ + dim_t n_panel, \ + dim_t m_panel_max, \ + dim_t n_panel_max, \ + ctype* restrict kappa, \ + ctype* restrict c, inc_t rs_c, inc_t cs_c, \ + ctype* restrict p, inc_t rs_p, inc_t cs_p \ ) \ { \ - ctype* restrict c_begin = c; \ - ctype* restrict p_begin = p; \ - ctype* restrict kappa_cast = kappa; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict zero = PASTEMAC(ch,0); \ \ dim_t i, j; \ dim_t panel_len; \ @@ -110,122 +107,151 @@ void PASTEMAC(ch,varname)( \ cs_p11 = cs_p; \ } \ \ - diagoffc_abs = bli_abs( diagoffc ); \ -\ - /* Sanity check. Diagonals should not intersect the short end of - a micro-panel. If they do, then somehow the constraints on - cache blocksizes being a whole multiple of the register - blocksizes was somehow violated. */ \ - if ( ( bli_is_col_stored_f( rs_p, cs_p ) && diagoffc < 0 ) || \ - ( bli_is_row_stored_f( rs_p, cs_p ) && diagoffc > 0 ) ) \ - bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ -\ - if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) || \ - ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) ) \ + if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \ { \ - p10_dim = panel_dim; \ - p10_len = diagoffc_abs; \ - p10 = p_begin; \ - c10 = c_begin; \ - incc10 = incc; \ - ldc10 = ldc; \ - conjc10 = conjc; \ -\ - p12_dim = panel_dim; \ - p12_len = panel_len - p10_len; \ - j = p10_len; \ - diagoffc12 = diagoffc_abs - j; \ - p12 = p_begin + (j )*ldp; \ - c12 = c_begin + (j )*ldc; \ - c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ - -diagoffc12 * ( doff_t )rs_c; \ - incc12 = ldc; \ - ldc12 = incc; \ - conjc12 = conjc; \ -\ - p11_m = panel_dim; \ - p11_n = panel_dim; \ - j = diagoffc_abs; \ - p11 = p_begin + (j )*ldp; \ - c11 = c_begin + (j )*ldc; \ -\ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( conjc12 ); \ - } \ - else /* if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \ - ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \ - { \ - p10_dim = panel_dim; \ - p10_len = diagoffc_abs + panel_dim; \ - diagoffc10 = diagoffc; \ - p10 = p_begin; \ - c10 = c_begin; \ - c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ - -diagoffc10 * ( doff_t )rs_c; \ - incc10 = ldc; \ - ldc10 = incc; \ - conjc10 = conjc; \ -\ - p12_dim = panel_dim; \ - p12_len = panel_len - p10_len; \ - j = p10_len; \ - p12 = p_begin + (j )*ldp; \ - c12 = c_begin + (j )*ldc; \ - incc12 = incc; \ - ldc12 = ldc; \ - conjc12 = conjc; \ -\ - p11_m = panel_dim; \ - p11_n = panel_dim; \ - j = diagoffc_abs; \ - p11 = p_begin + (j )*ldp; \ - c11 = c_begin + (j )*ldc; \ -\ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( conjc10 ); \ - } \ -\ - /* Pack to P10. For upper storage, this includes the unstored - triangle of C11. */ \ - PASTEMAC(ch,packm_cxk)( conjc10, \ - p10_dim, \ - p10_len, \ - kappa_cast, \ - c10, incc10, ldc10, \ - p10, ldp ); \ -\ - /* Pack to P12. For lower storage, this includes the unstored - triangle of C11. */ \ - PASTEMAC(ch,packm_cxk)( conjc12, \ - p12_dim, \ - p12_len, \ - kappa_cast, \ - c12, incc12, ldc12, \ - p12, ldp ); \ -\ - /* Pack the stored triangule of C11 to P11. */ \ - PASTEMAC3(ch,ch,ch,scal2m_unb_var1)( 0, \ - BLIS_NONUNIT_DIAG, \ - uploc, \ - conjc, \ - p11_m, \ - p11_n, \ - kappa_cast, \ - c11, rs_c, cs_c, \ - p11, rs_p11, cs_p11 ); \ -\ - /* If source matrix C is Hermitian, we have to zero out the - imaginary components of the diagonal of P11 in case the - corresponding elements in C11 were not already zero. */ \ - if ( bli_is_hermitian( strucc ) ) \ - { \ - /* NOTE: We can directly increment p11 since we are done - using p11 for the remainder of the function. */ \ - for ( i = 0; i < p11_m; ++i ) \ + /* If the current panel is unstored, we need to make a few + adjustments so we refer to the data where it is actually + stored, also taking conjugation into account. (Note this + implicitly assumes we are operating on a dense panel + within a larger symmetric or Hermitian matrix, since a + general matrix would not contain any unstored region.) */ \ + if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ { \ - PASTEMAC(ch,seti0s)( *p11 ); \ + c = c + diagoffc * ( doff_t )cs_c + \ + -diagoffc * ( doff_t )rs_c; \ + bli_swap_incs( incc, ldc ); \ \ - p11 += rs_p11 + cs_p11; \ + if ( bli_is_hermitian( strucc ) ) \ + bli_toggle_conj( conjc ); \ + } \ +\ + /* Pack the full panel. */ \ + PASTEMAC(ch,packm_cxk)( conjc, \ + panel_dim, \ + panel_len, \ + kappa, \ + c, incc, ldc, \ + p, ldp ); \ + } \ + else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \ + { \ + /* Sanity check. Diagonals should not intersect the short end of + a micro-panel. If they do, then somehow the constraints on + cache blocksizes being a whole multiple of the register + blocksizes was somehow violated. */ \ + if ( ( bli_is_col_stored_f( rs_p, cs_p ) && diagoffc < 0 ) || \ + ( bli_is_row_stored_f( rs_p, cs_p ) && diagoffc > 0 ) ) \ + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ +\ + diagoffc_abs = bli_abs( diagoffc ); \ +\ + if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) || \ + ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) ) \ + { \ + p10_dim = panel_dim; \ + p10_len = diagoffc_abs; \ + p10 = p; \ + c10 = c; \ + incc10 = incc; \ + ldc10 = ldc; \ + conjc10 = conjc; \ +\ + p12_dim = panel_dim; \ + p12_len = panel_len - p10_len; \ + j = p10_len; \ + diagoffc12 = diagoffc_abs - j; \ + p12 = p + (j )*ldp; \ + c12 = c + (j )*ldc; \ + c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ + -diagoffc12 * ( doff_t )rs_c; \ + incc12 = ldc; \ + ldc12 = incc; \ + conjc12 = conjc; \ +\ + p11_m = panel_dim; \ + p11_n = panel_dim; \ + j = diagoffc_abs; \ + p11 = p + (j )*ldp; \ + c11 = c + (j )*ldc; \ +\ + if ( bli_is_hermitian( strucc ) ) \ + bli_toggle_conj( conjc12 ); \ + } \ + else /* if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \ + ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \ + { \ + p10_dim = panel_dim; \ + p10_len = diagoffc_abs + panel_dim; \ + diagoffc10 = diagoffc; \ + p10 = p; \ + c10 = c; \ + c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ + -diagoffc10 * ( doff_t )rs_c; \ + incc10 = ldc; \ + ldc10 = incc; \ + conjc10 = conjc; \ +\ + p12_dim = panel_dim; \ + p12_len = panel_len - p10_len; \ + j = p10_len; \ + p12 = p + (j )*ldp; \ + c12 = c + (j )*ldc; \ + incc12 = incc; \ + ldc12 = ldc; \ + conjc12 = conjc; \ +\ + p11_m = panel_dim; \ + p11_n = panel_dim; \ + j = diagoffc_abs; \ + p11 = p + (j )*ldp; \ + c11 = c + (j )*ldc; \ +\ + if ( bli_is_hermitian( strucc ) ) \ + bli_toggle_conj( conjc10 ); \ + } \ +\ + /* Pack to P10. For upper storage, this includes the unstored + triangle of C11. */ \ + PASTEMAC(ch,packm_cxk)( conjc10, \ + p10_dim, \ + p10_len, \ + kappa, \ + c10, incc10, ldc10, \ + p10, ldp ); \ +\ + /* Pack to P12. For lower storage, this includes the unstored + triangle of C11. */ \ + PASTEMAC(ch,packm_cxk)( conjc12, \ + p12_dim, \ + p12_len, \ + kappa, \ + c12, incc12, ldc12, \ + p12, ldp ); \ +\ + /* Pack the stored triangule of C11 to P11. */ \ + PASTEMAC3(ch,ch,ch,scal2m_unb_var1)( 0, \ + BLIS_NONUNIT_DIAG, \ + uploc, \ + conjc, \ + p11_m, \ + p11_n, \ + kappa, \ + c11, rs_c, cs_c, \ + p11, rs_p11, cs_p11 ); \ +\ + /* If source matrix C is Hermitian, we have to zero out the + imaginary components of the diagonal of P11 in case the + corresponding elements in C11 were not already zero. */ \ + if ( bli_is_hermitian( strucc ) ) \ + { \ + /* NOTE: We can directly increment p11 since we are done + using p11 for the remainder of the function. */ \ + for ( i = 0; i < p11_m; ++i ) \ + { \ + PASTEMAC(ch,seti0s)( *p11 ); \ +\ + p11 += rs_p11 + cs_p11; \ + } \ } \ } \ \ @@ -241,7 +267,7 @@ void PASTEMAC(ch,varname)( \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ - ctype* p_edge = p_begin + (i )*rs_p; \ + ctype* p_edge = p + (i )*rs_p; \ \ PASTEMAC2(ch,ch,setm_unb_var1)( 0, \ BLIS_NONUNIT_DIAG, \ @@ -257,7 +283,7 @@ void PASTEMAC(ch,varname)( \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ - ctype* p_edge = p_begin + (j )*cs_p; \ + ctype* p_edge = p + (j )*cs_p; \ \ PASTEMAC2(ch,ch,setm_unb_var1)( 0, \ BLIS_NONUNIT_DIAG, \ diff --git a/frame/1m/packm/bli_packm_herm_cxk.h b/frame/1m/packm/bli_packm_herm_cxk.h index 209f49d27..1d1a43d2a 100644 --- a/frame/1m/packm/bli_packm_herm_cxk.h +++ b/frame/1m/packm/bli_packm_herm_cxk.h @@ -36,17 +36,17 @@ #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - uplo_t uploc, \ - conj_t conjc, \ - dim_t m_panel, \ - dim_t n_panel, \ - dim_t m_panel_max, \ - dim_t n_panel_max, \ - void* kappa, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p \ + struc_t strucc, \ + doff_t diagoffc, \ + uplo_t uploc, \ + conj_t conjc, \ + dim_t m_panel, \ + dim_t n_panel, \ + dim_t m_panel_max, \ + dim_t n_panel_max, \ + ctype* restrict kappa, \ + ctype* restrict c, inc_t rs_c, inc_t cs_c, \ + ctype* restrict p, inc_t rs_p, inc_t cs_p \ ); INSERT_GENTPROT_BASIC( packm_herm_cxk ) diff --git a/frame/1m/packm/bli_packm_int.c b/frame/1m/packm/bli_packm_int.c index f048b5fa0..1f41bbbe8 100644 --- a/frame/1m/packm/bli_packm_int.c +++ b/frame/1m/packm/bli_packm_int.c @@ -42,9 +42,9 @@ typedef void (*FUNCPTR_T)( obj_t* a, static FUNCPTR_T vars[6][3] = { // unblocked optimized unblocked blocked - { bli_packm_unb_var1, NULL, NULL, }, - { NULL, NULL, bli_packm_blk_var2 }, - { NULL, NULL, bli_packm_blk_var3 }, + { bli_packm_unb_var1, NULL, bli_packm_blk_var1 }, + { NULL, NULL, NULL, }, + { NULL, NULL, NULL, }, { NULL, NULL, NULL, }, { NULL, NULL, NULL, }, { NULL, NULL, NULL, }, diff --git a/frame/1m/packm/bli_packm_tri_cxk.c b/frame/1m/packm/bli_packm_tri_cxk.c index f72ab99e5..128b24461 100644 --- a/frame/1m/packm/bli_packm_tri_cxk.c +++ b/frame/1m/packm/bli_packm_tri_cxk.c @@ -38,25 +38,22 @@ #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffp, \ - diag_t diagc, \ - uplo_t uploc, \ - conj_t conjc, \ - bool_t invdiag, \ - dim_t m_panel, \ - dim_t n_panel, \ - dim_t m_panel_max, \ - dim_t n_panel_max, \ - ctype* kappa, \ - ctype* c, inc_t rs_c, inc_t cs_c, \ - ctype* p, inc_t rs_p, inc_t cs_p \ + struc_t strucc, \ + doff_t diagoffp, \ + diag_t diagc, \ + uplo_t uploc, \ + conj_t conjc, \ + bool_t invdiag, \ + dim_t m_panel, \ + dim_t n_panel, \ + dim_t m_panel_max, \ + dim_t n_panel_max, \ + ctype* restrict kappa, \ + ctype* restrict c, inc_t rs_c, inc_t cs_c, \ + ctype* restrict p, inc_t rs_p, inc_t cs_p \ ) \ { \ - ctype* restrict c_begin = c; \ - ctype* restrict p_begin = p; \ - ctype* restrict kappa_cast = kappa; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict zero = PASTEMAC(ch,0); \ \ dim_t panel_dim; \ dim_t panel_len; \ @@ -90,9 +87,9 @@ void PASTEMAC(ch,varname)( \ PASTEMAC(ch,packm_cxk)( conjc, \ panel_dim, \ panel_len, \ - kappa_cast, \ - c_begin, incc, ldc, \ - p_begin, ldp ); \ + kappa, \ + c, incc, ldc, \ + p, ldp ); \ \ /* If the diagonal of C is implicitly unit, set the diagonal of the packed panel to unit. */ \ @@ -101,8 +98,8 @@ void PASTEMAC(ch,varname)( \ PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \ m_panel, \ n_panel, \ - kappa_cast, \ - p_begin, rs_p, cs_p ); \ + kappa, \ + p, rs_p, cs_p ); \ } \ \ /* If requested, invert the diagonal of the packed panel. */ \ @@ -111,7 +108,7 @@ void PASTEMAC(ch,varname)( \ PASTEMAC(ch,invertd_unb_var1)( diagoffp, \ m_panel, \ n_panel, \ - p_begin, rs_p, cs_p ); \ + p, rs_p, cs_p ); \ } \ \ /* Set the region opposite the diagonal of P to zero. To do this, @@ -131,7 +128,7 @@ void PASTEMAC(ch,varname)( \ m_panel, \ n_panel, \ zero, \ - p_begin, rs_p, cs_p ); \ + p, rs_p, cs_p ); \ } \ \ /* The packed memory region was acquired/allocated with "aligned" @@ -146,7 +143,7 @@ void PASTEMAC(ch,varname)( \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ - ctype* p_edge = p_begin + (i )*rs_p; \ + ctype* p_edge = p + (i )*rs_p; \ \ PASTEMAC2(ch,ch,setm_unb_var1)( 0, \ BLIS_NONUNIT_DIAG, \ @@ -162,7 +159,7 @@ void PASTEMAC(ch,varname)( \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ - ctype* p_edge = p_begin + (j )*cs_p; \ + ctype* p_edge = p + (j )*cs_p; \ \ PASTEMAC2(ch,ch,setm_unb_var1)( 0, \ BLIS_NONUNIT_DIAG, \ @@ -184,7 +181,7 @@ void PASTEMAC(ch,varname)( \ dim_t m_br = m_panel_max - i; \ dim_t n_br = n_panel_max - j; \ ctype* one = PASTEMAC(ch,1); \ - ctype* p_edge = p_begin + (i )*rs_p + (j )*cs_p; \ + ctype* p_edge = p + (i )*rs_p + (j )*cs_p; \ \ PASTEMAC2(ch,ch,setd_unb_var1)( 0, \ m_br, \ @@ -199,10 +196,10 @@ void PASTEMAC(ch,varname)( \ /* if ( rs_p == 1 ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var3: ap copied", m_panel_max, n_panel_max, \ - p_begin, rs_p, cs_p, "%4.1f", "" ); \ + p, rs_p, cs_p, "%4.1f", "" ); \ if ( cs_p == 1 ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var3: bp copied", m_panel_max, n_panel_max, \ - p_begin, rs_p, cs_p, "%4.1f", "" ); \ + p, rs_p, cs_p, "%4.1f", "" ); \ */ \ } diff --git a/frame/1m/packm/bli_packm_tri_cxk.h b/frame/1m/packm/bli_packm_tri_cxk.h index 724d31642..67fa87ed6 100644 --- a/frame/1m/packm/bli_packm_tri_cxk.h +++ b/frame/1m/packm/bli_packm_tri_cxk.h @@ -36,19 +36,19 @@ #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - diag_t diagc, \ - uplo_t uploc, \ - conj_t conjc, \ - bool_t invdiag, \ - dim_t m_panel, \ - dim_t n_panel, \ - dim_t m_panel_max, \ - dim_t n_panel_max, \ - ctype* kappa, \ - ctype* c, inc_t rs_c, inc_t cs_c, \ - ctype* p, inc_t rs_p, inc_t cs_p \ + struc_t strucc, \ + doff_t diagoffp, \ + diag_t diagc, \ + uplo_t uploc, \ + conj_t conjc, \ + bool_t invdiag, \ + dim_t m_panel, \ + dim_t n_panel, \ + dim_t m_panel_max, \ + dim_t n_panel_max, \ + ctype* restrict kappa, \ + ctype* restrict c, inc_t rs_c, inc_t cs_c, \ + ctype* restrict p, inc_t rs_p, inc_t cs_p \ ); INSERT_GENTPROT_BASIC( packm_tri_cxk ) diff --git a/frame/1m/packm/bli_packm_blk_var2.c b/frame/1m/packm/old/bli_packm_blk_var2.c similarity index 96% rename from frame/1m/packm/bli_packm_blk_var2.c rename to frame/1m/packm/old/bli_packm_blk_var2.c index 5f1ff0542..38a6a4622 100644 --- a/frame/1m/packm/bli_packm_blk_var2.c +++ b/frame/1m/packm/old/bli_packm_blk_var2.c @@ -218,12 +218,10 @@ void PASTEMAC(ch,varname )( \ c_begin = c_cast + (ic )*vs_c; \ p_begin = p_cast + (ip )*ps_p; \ \ - /* If the current panel intersects the diagonal and C is either - upper- or lower-stored, then we assume C is symmetric or - Hermitian and that it must be densified. - Otherwise, we pack the panel all at once. */ \ - if ( bli_intersects_diag_n( diagoffc_i, *m_panel, *n_panel ) && \ - bli_is_upper_or_lower( uploc ) ) \ + /* Call a specialized packm kernel wrapper for Hermitian and + symmetric matrices. Otherwise, call the kernel wrapper for + general matrices. */ \ + if ( bli_is_herm_or_symm( strucc ) ) \ { \ PASTEMAC(ch,packm_herm_cxk)( strucc, \ diagoffc_i, \ @@ -237,7 +235,7 @@ void PASTEMAC(ch,varname )( \ c_begin, rs_c, cs_c, \ p_begin, rs_p, cs_p ); \ } \ - else \ + else /* if ( bli_is_general( strucc ) ) */ \ { \ PASTEMAC(ch,packm_gen_cxk)( strucc, \ diagoffc_i, \ diff --git a/frame/1m/packm/bli_packm_blk_var2.h b/frame/1m/packm/old/bli_packm_blk_var2.h similarity index 100% rename from frame/1m/packm/bli_packm_blk_var2.h rename to frame/1m/packm/old/bli_packm_blk_var2.h diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index a86d7e5d6..53dcf86be 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -99,7 +99,7 @@ void bli_gemm_cntl_init() gemm_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, gemm_mr, gemm_kr, TRUE, // densify; used by hemm/symm @@ -112,7 +112,7 @@ void bli_gemm_cntl_init() gemm_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, gemm_kr, gemm_nr, TRUE, // densify; used by hemm/symm diff --git a/frame/3/herk/bli_herk_cntl.c b/frame/3/herk/bli_herk_cntl.c index 3340003a3..f8555c870 100644 --- a/frame/3/herk/bli_herk_cntl.c +++ b/frame/3/herk/bli_herk_cntl.c @@ -62,7 +62,7 @@ void bli_herk_cntl_init() herk_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, gemm_mr, gemm_kr, FALSE, // already dense; densify not necessary @@ -75,7 +75,7 @@ void bli_herk_cntl_init() herk_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, gemm_kr, gemm_nr, FALSE, // already dense; densify not necessary diff --git a/frame/3/trmm/bli_trmm_cntl.c b/frame/3/trmm/bli_trmm_cntl.c index 06dbc0b69..a6f1acc5d 100644 --- a/frame/3/trmm/bli_trmm_cntl.c +++ b/frame/3/trmm/bli_trmm_cntl.c @@ -73,7 +73,7 @@ void bli_trmm_cntl_init() trmm_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, // pack panels of A compactly + BLIS_VARIANT1, // pack panels of A compactly // IMPORTANT: for consistency with trsm, "k" dim // multiple is set to mr. gemm_mr, @@ -88,7 +88,7 @@ void bli_trmm_cntl_init() trmm_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, // IMPORTANT: m dim multiple here must be mr // since "k" dim multiple is set to mr above. gemm_mr, @@ -104,7 +104,7 @@ void bli_trmm_cntl_init() trmm_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, // IMPORTANT: for consistency with trsm, "k" dim // multiple is set to nr. gemm_mr, @@ -119,7 +119,7 @@ void bli_trmm_cntl_init() trmm_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, // pack panels of B compactly + BLIS_VARIANT1, // pack panels of B compactly // IMPORTANT: m dim multiple here must be nr // since "k" dim multiple is set to nr above. gemm_nr, diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index c856a43f3..ae283c50c 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -88,7 +88,7 @@ void bli_trsm_cntl_init() trsm_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, // pack panels of A compactly + BLIS_VARIANT1, // pack panels of A compactly // IMPORTANT: n dim multiple must be mr to // support right and bottom-right edge cases gemm_mr, @@ -103,7 +103,7 @@ void bli_trsm_cntl_init() trsm_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, // IMPORTANT: m dim multiple must be mr since // B_pack is updated (ie: serves as C) in trsm gemm_mr, @@ -119,7 +119,7 @@ void bli_trsm_cntl_init() trsm_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, + BLIS_VARIANT1, gemm_nr, gemm_mr, FALSE, // already dense; densify not necessary @@ -132,7 +132,7 @@ void bli_trsm_cntl_init() trsm_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, // pack panels of B compactly + BLIS_VARIANT1, // pack panels of B compactly gemm_mr, gemm_mr, TRUE, // densify diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 83d51e34f..593a2611a 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -190,6 +190,12 @@ \ ( struc == BLIS_TRIANGULAR ) +#define bli_is_herm_or_symm( struc ) \ +\ + ( bli_is_hermitian( struc ) || \ + bli_is_symmetric( struc ) ) + + // conj diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index 27f3efe63..271392189 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -251,10 +251,10 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var3( &a, &ap ); + bli_packm_blk_var1( &a, &ap ); // Pack the contents of b to bp. - bli_packm_blk_var2( &b, &bp ); + bli_packm_blk_var1( &b, &bp ); // Create subpartitions from the a and b panels. diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index 724029903..61b4dd28f 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -217,7 +217,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var3( &a, &ap ); + bli_packm_blk_var1( &a, &ap ); // Repeat the experiment n_repeats times and record results.