diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index 7fdc2a2fd..32e134c5c 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -34,18 +34,13 @@ #include "blis.h" -packm_t* packm_cntl_row_noscale; -packm_t* packm_cntl_row_scale; -packm_t* packm_cntl_col_noscale; -packm_t* packm_cntl_col_scale; +packm_t* packm_cntl_row; +packm_t* packm_cntl_col; -packm_t* packm_cntl_rpn_noscale; -packm_t* packm_cntl_rpn_scale; -packm_t* packm_cntl_cpn_noscale; -packm_t* packm_cntl_cpn_scale; +packm_t* packm_cntl_rpn; +packm_t* packm_cntl_cpn; -packm_t* packm_cntl_noscale; -packm_t* packm_cntl_scale; +packm_t* packm_cntl; blksz_t* packm_mult_ldim; blksz_t* packm_mult_nvec; @@ -87,27 +82,13 @@ void bli_packm_cntl_init() // with structure, though they can also be used on matrices that // are already dense and/or have no structure. - // Create control trees to pack by rows (with and without scaling). - packm_cntl_row_noscale + // Create control trees to pack by rows. + packm_cntl_row = bli_packm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, // When packing to rows: packm_mult_nvec, // - nvec multiple is used for m dimension packm_mult_ldim, // - ldim multiple is used for n dimension - FALSE, // do NOT scale - FALSE, // do NOT densify structure - FALSE, // do NOT invert diagonal - FALSE, // do NOT iterate backwards if upper - FALSE, // do NOT iterate backwards if lower - BLIS_PACKED_ROWS, - BLIS_BUFFER_FOR_GEN_USE ); - packm_cntl_row_scale - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, // When packing to rows: - packm_mult_nvec, // - nvec multiple is used for m dimension - packm_mult_ldim, // - ldim multiple is used for n dimension - TRUE, // do scale FALSE, // do NOT densify structure FALSE, // do NOT invert diagonal FALSE, // do NOT iterate backwards if upper @@ -116,27 +97,13 @@ void bli_packm_cntl_init() BLIS_BUFFER_FOR_GEN_USE ); - // Create control trees to pack by columns (with and without scaling). - packm_cntl_col_noscale + // Create control trees to pack by columns. + packm_cntl_col = bli_packm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, // When packing to columns: packm_mult_ldim, // - ldim multiple is used for m dimension packm_mult_nvec, // - nvec multiple is used for n dimension - FALSE, // do NOT scale - FALSE, // do NOT densify structure - FALSE, // do NOT invert diagonal - FALSE, // do NOT iterate backwards if upper - FALSE, // do NOT iterate backwards if lower - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - packm_cntl_col_scale - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, // When packing to columns: - packm_mult_ldim, // - ldim multiple is used for m dimension - packm_mult_nvec, // - nvec multiple is used for n dimension - TRUE, // do scale FALSE, // do NOT densify structure FALSE, // do NOT invert diagonal FALSE, // do NOT iterate backwards if upper @@ -147,16 +114,13 @@ void bli_packm_cntl_init() // Set defaults when we don't care whether the packing is by rows or // by columns. - packm_cntl_noscale = packm_cntl_col_noscale; - packm_cntl_scale = packm_cntl_col_scale; + packm_cntl = packm_cntl_col; } void bli_packm_cntl_finalize() { - bli_cntl_obj_free( packm_cntl_row_noscale ); - bli_cntl_obj_free( packm_cntl_row_scale ); - bli_cntl_obj_free( packm_cntl_col_noscale ); - bli_cntl_obj_free( packm_cntl_col_scale ); + bli_cntl_obj_free( packm_cntl_row ); + bli_cntl_obj_free( packm_cntl_col ); bli_blksz_obj_free( packm_mult_ldim ); bli_blksz_obj_free( packm_mult_nvec ); @@ -166,7 +130,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* mr, blksz_t* nr, - bool_t does_scale, bool_t does_densify, bool_t does_invert_diag, bool_t rev_iter_if_upper, @@ -182,7 +145,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type, cntl->var_num = var_num; cntl->mr = mr; cntl->nr = nr; - cntl->does_scale = does_scale; cntl->does_densify = does_densify; cntl->does_invert_diag = does_invert_diag; cntl->rev_iter_if_upper = rev_iter_if_upper; @@ -198,7 +160,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl, varnum_t var_num, blksz_t* mr, blksz_t* nr, - bool_t does_scale, bool_t does_densify, bool_t does_invert_diag, bool_t rev_iter_if_upper, @@ -210,7 +171,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl, cntl->var_num = var_num; cntl->mr = mr; cntl->nr = nr; - cntl->does_scale = does_scale; cntl->does_densify = does_densify; cntl->does_invert_diag = does_invert_diag; cntl->rev_iter_if_upper = rev_iter_if_upper; diff --git a/frame/1m/packm/bli_packm_cntl.h b/frame/1m/packm/bli_packm_cntl.h index 50079431b..747da6123 100644 --- a/frame/1m/packm/bli_packm_cntl.h +++ b/frame/1m/packm/bli_packm_cntl.h @@ -38,7 +38,6 @@ struct packm_s varnum_t var_num; blksz_t* mr; blksz_t* nr; - bool_t does_scale; bool_t does_densify; bool_t does_invert_diag; bool_t rev_iter_if_upper; @@ -51,7 +50,6 @@ typedef struct packm_s packm_t; #define cntl_mr( cntl ) cntl->mr #define cntl_nr( cntl ) cntl->nr -#define cntl_does_scale( cntl ) cntl->does_scale #define cntl_does_densify( cntl ) cntl->does_densify #define cntl_does_invert_diag( cntl ) cntl->does_invert_diag #define cntl_rev_iter_if_upper( cntl ) cntl->rev_iter_if_upper @@ -73,7 +71,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* mr_def, blksz_t* nr_def, - bool_t does_scale, bool_t does_densify, bool_t does_invert_diag, bool_t rev_iter_if_upper, @@ -85,7 +82,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl, varnum_t var_num, blksz_t* mr_def, blksz_t* nr_def, - bool_t does_scale, bool_t does_densify, bool_t does_invert_diag, bool_t rev_iter_if_upper, diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index ebf5575cc..9f10bd7aa 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -185,8 +185,10 @@ void bli_packm_init_pack( bool_t densify, void* buf; - // We begin by copying the basic fields of c. - bli_obj_alias_to( *c, *p ); + // We begin by copying the basic fields of c. We do NOT copy the + // pack_mem entry from c because the entry in p may be cached from + // a previous iteration, and thus we don't want to overwrite it. + bli_obj_alias_for_packing( *c, *p ); // Update the dimension fields to explicitly reflect a transposition, // if needed. diff --git a/frame/1m/packm/bli_packm_int.c b/frame/1m/packm/bli_packm_int.c index 4dbec2beb..6d06fb98c 100644 --- a/frame/1m/packm/bli_packm_int.c +++ b/frame/1m/packm/bli_packm_int.c @@ -110,8 +110,9 @@ void bli_packm_int( obj_t* beta, // to be non-unit even when no scaling is prescribed. If the control tree // indicates no scaling, then make sure that BLIS_ONE is passed into the // packm implementation. - if ( cntl_does_scale( cntl ) ) beta_use = beta; - else beta_use = &BLIS_ONE; + //if ( cntl_does_scale( cntl ) ) beta_use = beta; + //else beta_use = &BLIS_ONE; + beta_use = &BLIS_ONE; // Extract the variant number and implementation type. n = cntl_var_num( cntl ); diff --git a/frame/2/gemv/bli_gemv_cntl.c b/frame/2/gemv/bli_gemv_cntl.c index eae6fba6a..81fa10f76 100644 --- a/frame/2/gemv/bli_gemv_cntl.c +++ b/frame/2/gemv/bli_gemv_cntl.c @@ -35,7 +35,7 @@ #include "blis.h" extern scalv_t* scalv_cntl; -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; @@ -107,23 +107,23 @@ void bli_gemv_cntl_init() bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemv_nc, - scalv_cntl, // scale y up-front - packm_cntl_noscale, // pack A1 (if needed) - packv_cntl, // pack x1 (if needed) - NULL, // y is not partitioned in var2 + scalv_cntl, // scale y up-front + packm_cntl, // pack A1 (if needed) + packv_cntl, // pack x1 (if needed) + NULL, // y is not partitioned in var2 gemv_cntl_bs_ke_dot, - NULL ); // y is not partitioned in var2 + NULL ); // y is not partitioned in var2 gemv_cntl_rp_bs_axpy = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemv_nc, - scalv_cntl, // scale y up-front - packm_cntl_noscale, // pack A1 (if needed) - packv_cntl, // pack x1 (if needed) - NULL, // y is not partitioned in var2 + scalv_cntl, // scale y up-front + packm_cntl, // pack A1 (if needed) + packv_cntl, // pack x1 (if needed) + NULL, // y is not partitioned in var2 gemv_cntl_bs_ke_axpy, - NULL ); // y is not partitioned in var2 + NULL ); // y is not partitioned in var2 // Create control trees for problems with relatively small n dimension @@ -133,23 +133,23 @@ void bli_gemv_cntl_init() bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemv_mc, - NULL, // no scaling in blk_var1 - packm_cntl_noscale, // pack A1 (if needed) - NULL, // x is not partitioned in var1 - packv_cntl, // pack y1 (if needed) + NULL, // no scaling in blk_var1 + packm_cntl, // pack A1 (if needed) + NULL, // x is not partitioned in var1 + packv_cntl, // pack y1 (if needed) gemv_cntl_bs_ke_dot, - unpackv_cntl ); // unpack y1 (if packed) + unpackv_cntl ); // unpack y1 (if packed) gemv_cntl_cp_bs_axpy = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemv_mc, - NULL, // no scaling in blk_var1 - packm_cntl_noscale, // pack A1 (if needed) - NULL, // x is not partitioned in var1 - packv_cntl, // pack y1 (if needed) + NULL, // no scaling in blk_var1 + packm_cntl, // pack A1 (if needed) + NULL, // x is not partitioned in var1 + packv_cntl, // pack y1 (if needed) gemv_cntl_bs_ke_axpy, - unpackv_cntl ); // unpack y1 (if packed) + unpackv_cntl ); // unpack y1 (if packed) // Create control trees for generally large problems. Here, we choose a @@ -159,23 +159,23 @@ void bli_gemv_cntl_init() bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemv_mc, - NULL, // no scaling in blk_var1 - NULL, // do not pack A1 - NULL, // x is not partitioned in var1 - packv_cntl, // pack y1 (if needed) + NULL, // no scaling in blk_var1 + NULL, // do not pack A1 + NULL, // x is not partitioned in var1 + packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_dot, - unpackv_cntl ); // unpack y1 (if packed) + unpackv_cntl ); // unpack y1 (if packed) gemv_cntl_ge_axpy = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemv_mc, - NULL, // no scaling in blk_var1 - NULL, // do not pack A1 - NULL, // x is not partitioned in var1 - packv_cntl, // pack y1 (if needed) + NULL, // no scaling in blk_var1 + NULL, // do not pack A1 + NULL, // x is not partitioned in var1 + packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_axpy, - unpackv_cntl ); // unpack y1 (if packed) + unpackv_cntl ); // unpack y1 (if packed) } void bli_gemv_cntl_finalize() diff --git a/frame/2/ger/bli_ger_cntl.c b/frame/2/ger/bli_ger_cntl.c index 98652a35e..516c422d8 100644 --- a/frame/2/ger/bli_ger_cntl.c +++ b/frame/2/ger/bli_ger_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; @@ -104,21 +104,21 @@ void bli_ger_cntl_init() bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, ger_nc, - NULL, // x is not partitioned in var2 - packv_cntl, // pack y1 (if needed) - packm_cntl_noscale, // pack A1 (if needed) + NULL, // x is not partitioned in var2 + packv_cntl, // pack y1 (if needed) + packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_row, - unpackm_cntl ); // unpack A1 (if packed) + unpackm_cntl ); // unpack A1 (if packed) ger_cntl_rp_bs_col = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, ger_nc, - NULL, // x is not partitioned in var2 - packv_cntl, // pack y1 (if needed) - packm_cntl_noscale, // pack A1 (if needed) + NULL, // x is not partitioned in var2 + packv_cntl, // pack y1 (if needed) + packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_col, - unpackm_cntl ); // unpack A1 (if packed) + unpackm_cntl ); // unpack A1 (if packed) // Create control trees for problems with relatively small n dimension @@ -128,21 +128,21 @@ void bli_ger_cntl_init() bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, ger_mc, - packv_cntl, // pack x1 (if needed) - NULL, // y is not partitioned in var1 - packm_cntl_noscale, // pack A1 (if needed) + packv_cntl, // pack x1 (if needed) + NULL, // y is not partitioned in var1 + packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_row, - unpackm_cntl ); // unpack A1 (if packed) + unpackm_cntl ); // unpack A1 (if packed) ger_cntl_cp_bs_col = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, ger_mc, - packv_cntl, // pack x1 (if needed) - NULL, // y is not partitioned in var1 - packm_cntl_noscale, // pack A1 (if needed) + packv_cntl, // pack x1 (if needed) + NULL, // y is not partitioned in var1 + packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_col, - unpackm_cntl ); // unpack A1 (if packed) + unpackm_cntl ); // unpack A1 (if packed) // Create control trees for generally large problems. Here, we choose a @@ -152,21 +152,21 @@ void bli_ger_cntl_init() bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, ger_nc, - NULL, // x is not partitioned in var2 - packv_cntl, // pack y1 (if needed) - NULL, // do not pack A1 + NULL, // x is not partitioned in var2 + packv_cntl, // pack y1 (if needed) + NULL, // do not pack A1 ger_cntl_cp_bs_row, - NULL ); // do not unpack A1 + NULL ); // do not unpack A1 ger_cntl_ge_col = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, ger_nc, - NULL, // x is not partitioned in var2 - packv_cntl, // pack y1 (if needed) - NULL, // do not pack A1 + NULL, // x is not partitioned in var2 + packv_cntl, // pack y1 (if needed) + NULL, // do not pack A1 ger_cntl_cp_bs_col, - NULL ); // do not unpack A1 + NULL ); // do not unpack A1 } void bli_ger_cntl_finalize() diff --git a/frame/2/hemv/bli_hemv_cntl.c b/frame/2/hemv/bli_hemv_cntl.c index a59308126..9c2846ad5 100644 --- a/frame/2/hemv/bli_hemv_cntl.c +++ b/frame/2/hemv/bli_hemv_cntl.c @@ -35,7 +35,7 @@ #include "blis.h" extern scalv_t* scalv_cntl; -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; @@ -95,7 +95,7 @@ void bli_hemv_cntl_init() BLIS_VARIANT2, hemv_mc, scalv_cntl, // scale y up-front - packm_cntl_noscale, // pack A11 (if needed) + packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_dot, // gemv_n_rp needed by var2 @@ -110,7 +110,7 @@ void bli_hemv_cntl_init() BLIS_VARIANT2, hemv_mc, scalv_cntl, // scale y up-front - packm_cntl_noscale, // pack A11 (if needed) + packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_axpy, // gemv_n_rp needed by var2 diff --git a/frame/2/her/bli_her_cntl.c b/frame/2/her/bli_her_cntl.c index c3e94d993..febcac68c 100644 --- a/frame/2/her/bli_her_cntl.c +++ b/frame/2/her/bli_her_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; @@ -93,21 +93,21 @@ void bli_her_cntl_init() bli_her_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, her_mc, - packv_cntl, // pack x1 (if needed) - NULL, // do NOT pack C11 + packv_cntl, // pack x1 (if needed) + NULL, // do NOT pack C11 ger_cntl_rp_bs_row, her_cntl_bs_ke_lrow_ucol, - NULL ); // no unpacking needed + NULL ); // no unpacking needed her_cntl_ge_lcol_urow = bli_her_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, her_mc, - packv_cntl, // pack x1 (if needed) - NULL, // do NOT pack C11 + packv_cntl, // pack x1 (if needed) + NULL, // do NOT pack C11 ger_cntl_cp_bs_col, her_cntl_bs_ke_lcol_urow, - NULL ); // no unpacking needed + NULL ); // no unpacking needed } void bli_her_cntl_finalize() diff --git a/frame/2/her2/bli_her2_cntl.c b/frame/2/her2/bli_her2_cntl.c index 117c77d18..52b17947a 100644 --- a/frame/2/her2/bli_her2_cntl.c +++ b/frame/2/her2/bli_her2_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; @@ -93,25 +93,25 @@ void bli_her2_cntl_init() bli_her2_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, her2_mc, - packv_cntl, // pack x1 (if needed) - packv_cntl, // pack y1 (if needed) - packm_cntl_noscale, // pack C11 (if needed) + packv_cntl, // pack x1 (if needed) + packv_cntl, // pack y1 (if needed) + packm_cntl, // pack C11 (if needed) ger_cntl_rp_bs_row, ger_cntl_rp_bs_row, her2_cntl_bs_ke_lrow_ucol, - unpackm_cntl ); // unpack C11 (if packed) + unpackm_cntl ); // unpack C11 (if packed) her2_cntl_ge_lcol_urow = bli_her2_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT4, her2_mc, - packv_cntl, // pack x1 (if needed) - packv_cntl, // pack y1 (if needed) - packm_cntl_noscale, // pack C11 (if needed) + packv_cntl, // pack x1 (if needed) + packv_cntl, // pack y1 (if needed) + packm_cntl, // pack C11 (if needed) ger_cntl_cp_bs_col, ger_cntl_cp_bs_col, her2_cntl_bs_ke_lcol_urow, - unpackm_cntl ); // unpack C11 (if packed) + unpackm_cntl ); // unpack C11 (if packed) } void bli_her2_cntl_finalize() diff --git a/frame/2/trmv/bli_trmv_cntl.c b/frame/2/trmv/bli_trmv_cntl.c index 0d6b2f1ea..e80ddd8ce 100644 --- a/frame/2/trmv/bli_trmv_cntl.c +++ b/frame/2/trmv/bli_trmv_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; @@ -92,25 +92,25 @@ void bli_trmv_cntl_init() trmv_cntl_ge_nrow_tcol = bli_trmv_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, // use var1 to maximize x1 usage + BLIS_VARIANT1, // use var1 to maximize x1 usage trmv_mc, - packm_cntl_noscale, // pack A11 (if needed) - packv_cntl, // pack x1 (if needed) - gemv_cntl_rp_bs_dot, // gemv_rp needed by var1 - NULL, // gemv_cp not needed by var1 + packm_cntl, // pack A11 (if needed) + packv_cntl, // pack x1 (if needed) + gemv_cntl_rp_bs_dot, // gemv_rp needed by var1 + NULL, // gemv_cp not needed by var1 trmv_cntl_bs_ke_nrow_tcol, - unpackv_cntl ); // unpack x1 (if packed) + unpackv_cntl ); // unpack x1 (if packed) trmv_cntl_ge_ncol_trow = bli_trmv_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, // use var1 to maximize x1 usage + BLIS_VARIANT1, // use var1 to maximize x1 usage trmv_mc, - packm_cntl_noscale, // pack A11 (if needed) - packv_cntl, // pack x1 (if needed) - gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1 - NULL, // gemv_cp not needed by var1 + packm_cntl, // pack A11 (if needed) + packv_cntl, // pack x1 (if needed) + gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1 + NULL, // gemv_cp not needed by var1 trmv_cntl_bs_ke_ncol_trow, - unpackv_cntl ); // unpack x1 (if packed) + unpackv_cntl ); // unpack x1 (if packed) } void bli_trmv_cntl_finalize() diff --git a/frame/2/trsv/bli_trsv_cntl.c b/frame/2/trsv/bli_trsv_cntl.c index b62a57a83..630e61c5d 100644 --- a/frame/2/trsv/bli_trsv_cntl.c +++ b/frame/2/trsv/bli_trsv_cntl.c @@ -35,7 +35,7 @@ #include "blis.h" extern scalv_t* scalv_cntl; -extern packm_t* packm_cntl_noscale; +extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; @@ -89,27 +89,27 @@ void bli_trsv_cntl_init() trsv_cntl_ge_nrow_tcol = bli_trsv_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, // use var1 to maximize x1 usage + BLIS_VARIANT1, // use var1 to maximize x1 usage trsv_mc, - scalv_cntl, // scale x up-front - packm_cntl_noscale, // pack A11 (if needed) - packv_cntl, // pack x1 (if needed) - gemv_cntl_rp_bs_dot, // gemv_rp needed by var1 - NULL, // gemv_cp not needed by var1 + scalv_cntl, // scale x up-front + packm_cntl, // pack A11 (if needed) + packv_cntl, // pack x1 (if needed) + gemv_cntl_rp_bs_dot, // gemv_rp needed by var1 + NULL, // gemv_cp not needed by var1 trsv_cntl_bs_ke_nrow_tcol, - unpackv_cntl ); // unpack x1 (if needed) + unpackv_cntl ); // unpack x1 (if needed) trsv_cntl_ge_ncol_trow = bli_trsv_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, // use var1 to maximize x1 usage + BLIS_VARIANT1, // use var1 to maximize x1 usage trsv_mc, - scalv_cntl, // scale x up-front - packm_cntl_noscale, // pack A11 (if needed) - packv_cntl, // pack x1 (if needed) - gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1 - NULL, // gemv_cp not needed by var1 + scalv_cntl, // scale x up-front + packm_cntl, // pack A11 (if needed) + packv_cntl, // pack x1 (if needed) + gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1 + NULL, // gemv_cp not needed by var1 trsv_cntl_bs_ke_ncol_trow, - unpackv_cntl ); // unpack x1 (if needed) + unpackv_cntl ); // unpack x1 (if needed) } void bli_trsv_cntl_finalize() diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index 1074bb0c5..d7afeff82 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -109,7 +109,6 @@ void bli_gemm_cntl_init() BLIS_VARIANT2, gemm_mr, gemm_kr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -123,7 +122,6 @@ void bli_gemm_cntl_init() BLIS_VARIANT2, gemm_kr, gemm_nr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -138,7 +136,6 @@ void bli_gemm_cntl_init() BLIS_VARIANT1, gemm_mr, gemm_nr, - FALSE, // do NOT scale by beta FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? diff --git a/frame/3/hemm/bli_hemm_cntl.c b/frame/3/hemm/bli_hemm_cntl.c index cedb848e3..7d0975751 100644 --- a/frame/3/hemm/bli_hemm_cntl.c +++ b/frame/3/hemm/bli_hemm_cntl.c @@ -103,7 +103,6 @@ void bli_hemm_cntl_init() BLIS_VARIANT2, hemm_mr, hemm_kr, - FALSE, // do NOT scale by alpha TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -117,9 +116,7 @@ void bli_hemm_cntl_init() BLIS_VARIANT2, hemm_kr, hemm_nr, - FALSE, // do NOT scale by alpha - //FALSE, // already dense; densify not necessary - TRUE, // densify (if needed) + TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? @@ -133,7 +130,6 @@ void bli_hemm_cntl_init() BLIS_VARIANT1, hemm_mr, hemm_nr, - FALSE, // do NOT scale by beta FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? diff --git a/frame/3/her2k/bli_her2k_cntl.c b/frame/3/her2k/bli_her2k_cntl.c index 2ef5c9629..9a68334d9 100644 --- a/frame/3/her2k/bli_her2k_cntl.c +++ b/frame/3/her2k/bli_her2k_cntl.c @@ -104,7 +104,6 @@ void bli_her2k_cntl_init() BLIS_VARIANT2, her2k_mr, her2k_kr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -118,7 +117,6 @@ void bli_her2k_cntl_init() BLIS_VARIANT2, her2k_kr, her2k_nr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -133,7 +131,6 @@ void bli_her2k_cntl_init() BLIS_VARIANT1, her2k_mr, her2k_nr, - FALSE, // do NOT scale by beta FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? diff --git a/frame/3/herk/bli_herk_cntl.c b/frame/3/herk/bli_herk_cntl.c index e515f4c8c..e31c44ee1 100644 --- a/frame/3/herk/bli_herk_cntl.c +++ b/frame/3/herk/bli_herk_cntl.c @@ -103,7 +103,6 @@ void bli_herk_cntl_init() BLIS_VARIANT2, herk_mr, herk_kr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -117,7 +116,6 @@ void bli_herk_cntl_init() BLIS_VARIANT2, herk_kr, herk_nr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -132,7 +130,6 @@ void bli_herk_cntl_init() BLIS_VARIANT1, herk_mr, herk_nr, - FALSE, // do NOT scale by beta FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? diff --git a/frame/3/trmm/bli_trmm_cntl.c b/frame/3/trmm/bli_trmm_cntl.c index a990c940e..7dbc985f5 100644 --- a/frame/3/trmm/bli_trmm_cntl.c +++ b/frame/3/trmm/bli_trmm_cntl.c @@ -116,7 +116,6 @@ void bli_trmm_cntl_init() // multiple is set to mr. trmm_mr, trmm_mr, - FALSE, // do NOT scale by alpha TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -132,7 +131,6 @@ void bli_trmm_cntl_init() // since "k" dim multiple is set to mr above. trmm_mr, trmm_nr, - FALSE, // do NOT scale by alpha FALSE, // already dense FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -149,7 +147,6 @@ void bli_trmm_cntl_init() // multiple is set to nr. trmm_mr, trmm_nr, - FALSE, // do NOT scale by alpha FALSE, // already dense FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -165,7 +162,6 @@ void bli_trmm_cntl_init() // since "k" dim multiple is set to nr above. trmm_nr, trmm_nr, - FALSE, // do NOT scale by alpha TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -180,7 +176,6 @@ void bli_trmm_cntl_init() BLIS_VARIANT1, trmm_mr, trmm_nr, - FALSE, // do NOT scale by beta FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? diff --git a/frame/3/trmm3/bli_trmm3.c b/frame/3/trmm3/bli_trmm3.c index 73a3795a7..1baa4fb43 100644 --- a/frame/3/trmm3/bli_trmm3.c +++ b/frame/3/trmm3/bli_trmm3.c @@ -34,7 +34,8 @@ #include "blis.h" -extern trmm_t* trmm3_cntl; +extern trmm_t* trmm_l_cntl; +extern trmm_t* trmm_r_cntl; // // Define object-based interface. @@ -133,7 +134,8 @@ void bli_trmm3( side_t side, &beta_local ); // Choose the control tree. - cntl = trmm3_cntl; + if ( bli_is_left( side ) ) cntl = trmm_l_cntl; + else cntl = trmm_r_cntl; // Invoke the internal back-end. bli_trmm_int( &alpha_local, diff --git a/frame/3/trmm3/bli_trmm3.h b/frame/3/trmm3/bli_trmm3.h index c3c4aa1a7..e4094265b 100644 --- a/frame/3/trmm3/bli_trmm3.h +++ b/frame/3/trmm3/bli_trmm3.h @@ -32,7 +32,6 @@ */ -#include "bli_trmm3_cntl.h" #include "bli_trmm3_check.h" diff --git a/frame/3/trmm3/bli_trmm3_cntl.c b/frame/3/trmm3/bli_trmm3_cntl.c deleted file mode 100644 index 1be47a601..000000000 --- a/frame/3/trmm3/bli_trmm3_cntl.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2013, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern scalm_t* scalm_cntl; -extern gemm_t* gemm_cntl_bp_ke; - -trmm_t* trmm3_cntl; - -trmm_t* trmm3_cntl_bp_ke; -trmm_t* trmm3_cntl_op_bp; -trmm_t* trmm3_cntl_mm_op; -trmm_t* trmm3_cntl_vl_mm; - -packm_t* trmm3_packa_cntl; -packm_t* trmm3_packb_cntl; -packm_t* trmm3_packc_cntl; -unpackm_t* trmm3_unpackc_cntl; - -blksz_t* trmm3_mc; -blksz_t* trmm3_nc; -blksz_t* trmm3_kc; -blksz_t* trmm3_mr; -blksz_t* trmm3_nr; -blksz_t* trmm3_kr; -blksz_t* trmm3_ni; - - -void bli_trmm3_cntl_init() -{ - // Create blocksize objects for each dimension. - trmm3_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, - BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, - BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, - BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); - - trmm3_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, - BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, - BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, - BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); - - trmm3_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, - BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, - BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, - BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); - - trmm3_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, - BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, - BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, - BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); - - trmm3_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, - BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, - BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, - BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); - - trmm3_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, - BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, - BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, - BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); - - trmm3_ni = bli_blksz_obj_create( BLIS_DEFAULT_NI_S, 0, - BLIS_DEFAULT_NI_D, 0, - BLIS_DEFAULT_NI_C, 0, - BLIS_DEFAULT_NI_Z, 0 ); - - - // Create control tree objects for packm operations on a, b, and c. - trmm3_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, // pack panels of A compactly - // IMPORTANT: for consistency with trsm, "k" dim - // multiple is set to mr. - trmm3_mr, - trmm3_mr, - FALSE, // do NOT scale by alpha - TRUE, // densify - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS, - BLIS_BUFFER_FOR_A_BLOCK ); - - trmm3_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: m dim multiple here must be mr - // since "k" dim multiple is set to mr above. - trmm3_mr, - trmm3_nr, - FALSE, // do NOT scale by alpha - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS, - BLIS_BUFFER_FOR_B_PANEL ); - - trmm3_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - trmm3_mr, - trmm3_nr, - FALSE, // do NOT scale by beta - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - - trmm3_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed - - - // Create control tree object for lowest-level block-panel kernel. - trmm3_cntl_bp_ke - = - bli_trmm_cntl_obj_create( BLIS_UNB_OPT, - BLIS_VARIANT2, - NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem, packing a and b. - trmm3_cntl_op_bp - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - //BLIS_VARIANT4, // var1 with incremental pack in iter 0 - BLIS_VARIANT1, - trmm3_mc, - trmm3_ni, - NULL, - trmm3_packa_cntl, - trmm3_packb_cntl, - NULL, - trmm3_cntl_bp_ke, - gemm_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates, packing a and b. - trmm3_cntl_mm_op - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - trmm3_kc, - NULL, - NULL, - NULL, - NULL, - NULL, - trmm3_cntl_op_bp, - NULL, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems, packing a and b. - trmm3_cntl_vl_mm - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - trmm3_nc, - NULL, - NULL, - NULL, - NULL, - NULL, - trmm3_cntl_mm_op, - NULL, - NULL ); - - // Alias the "master" trmm3 control tree to a shorter name. - //trmm3_cntl = trmm3_cntl_mm_op; - trmm3_cntl = trmm3_cntl_vl_mm; -} - -void bli_trmm3_cntl_finalize() -{ - bli_blksz_obj_free( trmm3_mc ); - bli_blksz_obj_free( trmm3_nc ); - bli_blksz_obj_free( trmm3_kc ); - bli_blksz_obj_free( trmm3_mr ); - bli_blksz_obj_free( trmm3_nr ); - bli_blksz_obj_free( trmm3_kr ); - bli_blksz_obj_free( trmm3_ni ); - - bli_cntl_obj_free( trmm3_packa_cntl ); - bli_cntl_obj_free( trmm3_packb_cntl ); - bli_cntl_obj_free( trmm3_packc_cntl ); - bli_cntl_obj_free( trmm3_unpackc_cntl ); - - bli_cntl_obj_free( trmm3_cntl_bp_ke ); - bli_cntl_obj_free( trmm3_cntl_op_bp ); - bli_cntl_obj_free( trmm3_cntl_mm_op ); - bli_cntl_obj_free( trmm3_cntl_vl_mm ); -} - diff --git a/frame/3/trmm3/bli_trmm3_cntl.h b/frame/3/trmm3/bli_trmm3_cntl.h deleted file mode 100644 index 904528a7a..000000000 --- a/frame/3/trmm3/bli_trmm3_cntl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2013, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trmm3_cntl_init( void ); -void bli_trmm3_cntl_finalize( void ); - diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index b8ab1d2cd..9b49919d1 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -116,7 +116,6 @@ void bli_trsm_cntl_init() // support right and bottom-right edge cases trsm_mr, trsm_mr, - FALSE, // do NOT scale by alpha TRUE, // densify TRUE, // invert diagonal TRUE, // reverse iteration if upper? @@ -132,7 +131,6 @@ void bli_trsm_cntl_init() // B_pack is updated (ie: serves as C) in trsm trsm_mr, trsm_nr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -147,7 +145,6 @@ void bli_trsm_cntl_init() BLIS_VARIANT2, trsm_nr, trsm_mr, - FALSE, // do NOT scale by alpha FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -161,7 +158,6 @@ void bli_trsm_cntl_init() BLIS_VARIANT3, // pack panels of B compactly trsm_mr, trsm_mr, - FALSE, // do NOT scale by alpha TRUE, // densify TRUE, // invert diagonal FALSE, // reverse iteration if upper? @@ -176,7 +172,6 @@ void bli_trsm_cntl_init() BLIS_VARIANT1, trsm_mr, trsm_nr, - FALSE, // do NOT scale by beta FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? diff --git a/frame/cntl/bli_cntl_init.c b/frame/cntl/bli_cntl_init.c index f2945b9ff..227d7ff01 100644 --- a/frame/cntl/bli_cntl_init.c +++ b/frame/cntl/bli_cntl_init.c @@ -61,7 +61,6 @@ void bli_cntl_init( void ) bli_herk_cntl_init(); bli_her2k_cntl_init(); bli_trmm_cntl_init(); - bli_trmm3_cntl_init(); bli_trsm_cntl_init(); } @@ -92,7 +91,6 @@ void bli_cntl_finalize( void ) bli_herk_cntl_finalize(); bli_her2k_cntl_finalize(); bli_trmm_cntl_finalize(); - bli_trmm3_cntl_finalize(); bli_trsm_cntl_finalize(); } diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index 4381d3da5..fdfbc7ad7 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -799,11 +799,19 @@ bli_obj_width_stored( obj ) // -- Miscellaneous object macros -- -// Make an alias (shallow copy) +// Make a special alias (shallow copy) that does not overwrite pack_mem +// entry. + +#define bli_obj_alias_for_packing( a, b ) \ +{ \ + bli_obj_init_basic_shallow_copy_of( a, b ); \ +} + +// Make a full alias (shallow copy), including pack_mem and friends #define bli_obj_alias_to( a, b ) \ { \ - bli_obj_init_as_copy_of( a, b ); \ + bli_obj_init_full_shallow_copy_of( a, b ); \ } // Check if two objects are aliases of one another @@ -844,10 +852,8 @@ bli_obj_width_stored( obj ) #define bli_obj_init_pack( obj_p ) \ { \ mem_t* pack_mem = bli_obj_pack_mem( *obj_p ); \ - /*mem_t* cast_mem = bli_obj_cast_mem( *obj_p );*/ \ \ bli_mem_set_buffer( NULL, pack_mem ); \ - /*bli_mem_set_buffer( NULL, cast_mem );*/ \ } @@ -868,12 +874,6 @@ bli_obj_width_stored( obj ) mem_t* pack_mem = bli_obj_pack_mem( *(obj_p) ); \ if ( bli_mem_is_alloc( pack_mem ) ) \ bli_mem_release( pack_mem ); \ -\ -/* - mem_t* cast_mem = bli_obj_cast_mem( *(obj_p) ); \ - if ( bli_mem_is_alloc( cast_mem ) ) \ - bli_mem_release( cast_mem ); \ -*/ \ } diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 89f4908a6..9bc3318d9 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -480,15 +480,12 @@ typedef struct obj_s inc_t ps; // panel stride (distance to next panel) inc_t pd; // panel dimension (the "width" of a panel: // usually MR or NR) - - //mem_t cast_mem; // cached memory region for casting - } obj_t; // Define these macros here since they must be updated if contents of // obj_t changes. -#define bli_obj_init_as_copy_of( a, b ) \ +#define bli_obj_init_basic_shallow_copy_of( a, b ) \ { \ (b).root = (a).root; \ \ @@ -504,12 +501,28 @@ typedef struct obj_s (b).buffer = (a).buffer; \ (b).rs = (a).rs; \ (b).cs = (a).cs; \ +\ + (b).scalar = (a).scalar; \ \ /* We must NOT copy pack_mem field since this macro forms the basis of bli_obj_alias_to(), which is used in packm_init(). There, we want to copy the basic fields of the obj_t but PRESERVE the pack_mem field - (and the corresponding dimensions and stride) of the destination - object since it holds the cached mem_t object and buffer. */ \ + of the destination object since it holds the "cached" mem_t object + and buffer. The other fields, such as padded dimensions, are always + set by bli_packm_init(), so we don't need to copy them either. */ \ +} + +#define bli_obj_init_full_shallow_copy_of( a, b ) \ +{ \ + /* This macro implements a full alias (shallow copy) that copies all + fields of the obj_t struct. */ \ + bli_obj_init_basic_shallow_copy_of( a, b ); \ +\ + (b).pack_mem = (a).pack_mem; \ + (b).m_padded = (a).m_padded; \ + (b).n_padded = (a).n_padded; \ + (b).ps = (a).ps; \ + (b).pd = (a).pd; \ } #define bli_obj_init_subpart_from( a, b ) \ @@ -518,8 +531,8 @@ typedef struct obj_s \ (b).offm = (a).offm; \ (b).offn = (a).offn; \ -\ -\ + /* Avoid copying m since it will be overwritten. */ \ + /* Avoid copying n since it will be overwritten. */ \ (b).diag_off = (a).diag_off; \ \ (b).info = (a).info; \ @@ -528,18 +541,19 @@ typedef struct obj_s (b).buffer = (a).buffer; \ (b).rs = (a).rs; \ (b).cs = (a).cs; \ +\ + (b).scalar = (a).scalar; \ \ /* We want to copy the pack_mem field here because this macro is used when creating subpartitions, including those of packed objects. In those situations, we want the subpartition to inherit the pack_mem - field, and the corresponding packed dimensions, of its parent. */ \ + field of its parent, as well as other related fields such as the + padded dimensions. */ \ (b).pack_mem = (a).pack_mem; \ (b).m_padded = (a).m_padded; \ (b).n_padded = (a).n_padded; \ (b).pd = (a).pd; \ (b).ps = (a).ps; \ -\ - /*(b).cast_mem = (a).cast_mem;*/ \ }