Removed optional scaling from packm control tree.

Details:
- Removed does_scale field from packm control tree node and
  bli_packm_cntl_obj_create() interface. Adjusted all invocations of
  _cntl_obj_create() accordingly.
- Redefined/renamted macros that are used in aliasing so that now,
  bli_obj_alias_to() does a full alias (shallow copy) while
  bli_obj_alias_for_packing() does a partial alias that preserves the
  pack_mem-related fields of the aliasing (destination) object.
- Removed bli_trmm3_cntl.c, .h after realizing that the trmm control tree
  will work just fine for bli_trmm3().
- Removed some commented vestiges of the typecasting functionality needed
  to support heterogeneous datatypes.
This commit is contained in:
Field G. Van Zee
2013-11-24 11:40:31 -06:00
parent e65c476284
commit 9552e6ee82
24 changed files with 162 additions and 486 deletions

View File

@@ -34,18 +34,13 @@
#include "blis.h"
packm_t* packm_cntl_row_noscale;
packm_t* packm_cntl_row_scale;
packm_t* packm_cntl_col_noscale;
packm_t* packm_cntl_col_scale;
packm_t* packm_cntl_row;
packm_t* packm_cntl_col;
packm_t* packm_cntl_rpn_noscale;
packm_t* packm_cntl_rpn_scale;
packm_t* packm_cntl_cpn_noscale;
packm_t* packm_cntl_cpn_scale;
packm_t* packm_cntl_rpn;
packm_t* packm_cntl_cpn;
packm_t* packm_cntl_noscale;
packm_t* packm_cntl_scale;
packm_t* packm_cntl;
blksz_t* packm_mult_ldim;
blksz_t* packm_mult_nvec;
@@ -87,27 +82,13 @@ void bli_packm_cntl_init()
// with structure, though they can also be used on matrices that
// are already dense and/or have no structure.
// Create control trees to pack by rows (with and without scaling).
packm_cntl_row_noscale
// Create control trees to pack by rows.
packm_cntl_row
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1, // When packing to rows:
packm_mult_nvec, // - nvec multiple is used for m dimension
packm_mult_ldim, // - ldim multiple is used for n dimension
FALSE, // do NOT scale
FALSE, // do NOT densify structure
FALSE, // do NOT invert diagonal
FALSE, // do NOT iterate backwards if upper
FALSE, // do NOT iterate backwards if lower
BLIS_PACKED_ROWS,
BLIS_BUFFER_FOR_GEN_USE );
packm_cntl_row_scale
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1, // When packing to rows:
packm_mult_nvec, // - nvec multiple is used for m dimension
packm_mult_ldim, // - ldim multiple is used for n dimension
TRUE, // do scale
FALSE, // do NOT densify structure
FALSE, // do NOT invert diagonal
FALSE, // do NOT iterate backwards if upper
@@ -116,27 +97,13 @@ void bli_packm_cntl_init()
BLIS_BUFFER_FOR_GEN_USE );
// Create control trees to pack by columns (with and without scaling).
packm_cntl_col_noscale
// Create control trees to pack by columns.
packm_cntl_col
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1, // When packing to columns:
packm_mult_ldim, // - ldim multiple is used for m dimension
packm_mult_nvec, // - nvec multiple is used for n dimension
FALSE, // do NOT scale
FALSE, // do NOT densify structure
FALSE, // do NOT invert diagonal
FALSE, // do NOT iterate backwards if upper
FALSE, // do NOT iterate backwards if lower
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
packm_cntl_col_scale
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1, // When packing to columns:
packm_mult_ldim, // - ldim multiple is used for m dimension
packm_mult_nvec, // - nvec multiple is used for n dimension
TRUE, // do scale
FALSE, // do NOT densify structure
FALSE, // do NOT invert diagonal
FALSE, // do NOT iterate backwards if upper
@@ -147,16 +114,13 @@ void bli_packm_cntl_init()
// Set defaults when we don't care whether the packing is by rows or
// by columns.
packm_cntl_noscale = packm_cntl_col_noscale;
packm_cntl_scale = packm_cntl_col_scale;
packm_cntl = packm_cntl_col;
}
void bli_packm_cntl_finalize()
{
bli_cntl_obj_free( packm_cntl_row_noscale );
bli_cntl_obj_free( packm_cntl_row_scale );
bli_cntl_obj_free( packm_cntl_col_noscale );
bli_cntl_obj_free( packm_cntl_col_scale );
bli_cntl_obj_free( packm_cntl_row );
bli_cntl_obj_free( packm_cntl_col );
bli_blksz_obj_free( packm_mult_ldim );
bli_blksz_obj_free( packm_mult_nvec );
@@ -166,7 +130,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
varnum_t var_num,
blksz_t* mr,
blksz_t* nr,
bool_t does_scale,
bool_t does_densify,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
@@ -182,7 +145,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
cntl->var_num = var_num;
cntl->mr = mr;
cntl->nr = nr;
cntl->does_scale = does_scale;
cntl->does_densify = does_densify;
cntl->does_invert_diag = does_invert_diag;
cntl->rev_iter_if_upper = rev_iter_if_upper;
@@ -198,7 +160,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl,
varnum_t var_num,
blksz_t* mr,
blksz_t* nr,
bool_t does_scale,
bool_t does_densify,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
@@ -210,7 +171,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl,
cntl->var_num = var_num;
cntl->mr = mr;
cntl->nr = nr;
cntl->does_scale = does_scale;
cntl->does_densify = does_densify;
cntl->does_invert_diag = does_invert_diag;
cntl->rev_iter_if_upper = rev_iter_if_upper;

View File

@@ -38,7 +38,6 @@ struct packm_s
varnum_t var_num;
blksz_t* mr;
blksz_t* nr;
bool_t does_scale;
bool_t does_densify;
bool_t does_invert_diag;
bool_t rev_iter_if_upper;
@@ -51,7 +50,6 @@ typedef struct packm_s packm_t;
#define cntl_mr( cntl ) cntl->mr
#define cntl_nr( cntl ) cntl->nr
#define cntl_does_scale( cntl ) cntl->does_scale
#define cntl_does_densify( cntl ) cntl->does_densify
#define cntl_does_invert_diag( cntl ) cntl->does_invert_diag
#define cntl_rev_iter_if_upper( cntl ) cntl->rev_iter_if_upper
@@ -73,7 +71,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
varnum_t var_num,
blksz_t* mr_def,
blksz_t* nr_def,
bool_t does_scale,
bool_t does_densify,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
@@ -85,7 +82,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl,
varnum_t var_num,
blksz_t* mr_def,
blksz_t* nr_def,
bool_t does_scale,
bool_t does_densify,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,

View File

@@ -185,8 +185,10 @@ void bli_packm_init_pack( bool_t densify,
void* buf;
// We begin by copying the basic fields of c.
bli_obj_alias_to( *c, *p );
// We begin by copying the basic fields of c. We do NOT copy the
// pack_mem entry from c because the entry in p may be cached from
// a previous iteration, and thus we don't want to overwrite it.
bli_obj_alias_for_packing( *c, *p );
// Update the dimension fields to explicitly reflect a transposition,
// if needed.

View File

@@ -110,8 +110,9 @@ void bli_packm_int( obj_t* beta,
// to be non-unit even when no scaling is prescribed. If the control tree
// indicates no scaling, then make sure that BLIS_ONE is passed into the
// packm implementation.
if ( cntl_does_scale( cntl ) ) beta_use = beta;
else beta_use = &BLIS_ONE;
//if ( cntl_does_scale( cntl ) ) beta_use = beta;
//else beta_use = &BLIS_ONE;
beta_use = &BLIS_ONE;
// Extract the variant number and implementation type.
n = cntl_var_num( cntl );

View File

@@ -35,7 +35,7 @@
#include "blis.h"
extern scalv_t* scalv_cntl;
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackv_t* unpackv_cntl;
@@ -107,23 +107,23 @@ void bli_gemv_cntl_init()
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemv_nc,
scalv_cntl, // scale y up-front
packm_cntl_noscale, // pack A1 (if needed)
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var2
scalv_cntl, // scale y up-front
packm_cntl, // pack A1 (if needed)
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var2
gemv_cntl_bs_ke_dot,
NULL ); // y is not partitioned in var2
NULL ); // y is not partitioned in var2
gemv_cntl_rp_bs_axpy
=
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemv_nc,
scalv_cntl, // scale y up-front
packm_cntl_noscale, // pack A1 (if needed)
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var2
scalv_cntl, // scale y up-front
packm_cntl, // pack A1 (if needed)
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var2
gemv_cntl_bs_ke_axpy,
NULL ); // y is not partitioned in var2
NULL ); // y is not partitioned in var2
// Create control trees for problems with relatively small n dimension
@@ -133,23 +133,23 @@ void bli_gemv_cntl_init()
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemv_mc,
NULL, // no scaling in blk_var1
packm_cntl_noscale, // pack A1 (if needed)
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
NULL, // no scaling in blk_var1
packm_cntl, // pack A1 (if needed)
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
gemv_cntl_bs_ke_dot,
unpackv_cntl ); // unpack y1 (if packed)
unpackv_cntl ); // unpack y1 (if packed)
gemv_cntl_cp_bs_axpy
=
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemv_mc,
NULL, // no scaling in blk_var1
packm_cntl_noscale, // pack A1 (if needed)
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
NULL, // no scaling in blk_var1
packm_cntl, // pack A1 (if needed)
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
gemv_cntl_bs_ke_axpy,
unpackv_cntl ); // unpack y1 (if packed)
unpackv_cntl ); // unpack y1 (if packed)
// Create control trees for generally large problems. Here, we choose a
@@ -159,23 +159,23 @@ void bli_gemv_cntl_init()
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemv_mc,
NULL, // no scaling in blk_var1
NULL, // do not pack A1
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
NULL, // no scaling in blk_var1
NULL, // do not pack A1
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
gemv_cntl_rp_bs_dot,
unpackv_cntl ); // unpack y1 (if packed)
unpackv_cntl ); // unpack y1 (if packed)
gemv_cntl_ge_axpy
=
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemv_mc,
NULL, // no scaling in blk_var1
NULL, // do not pack A1
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
NULL, // no scaling in blk_var1
NULL, // do not pack A1
NULL, // x is not partitioned in var1
packv_cntl, // pack y1 (if needed)
gemv_cntl_rp_bs_axpy,
unpackv_cntl ); // unpack y1 (if packed)
unpackv_cntl ); // unpack y1 (if packed)
}
void bli_gemv_cntl_finalize()

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackm_t* unpackm_cntl;
@@ -104,21 +104,21 @@ void bli_ger_cntl_init()
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
packm_cntl_noscale, // pack A1 (if needed)
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack A1 (if needed)
ger_cntl_bs_ke_row,
unpackm_cntl ); // unpack A1 (if packed)
unpackm_cntl ); // unpack A1 (if packed)
ger_cntl_rp_bs_col
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
packm_cntl_noscale, // pack A1 (if needed)
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack A1 (if needed)
ger_cntl_bs_ke_col,
unpackm_cntl ); // unpack A1 (if packed)
unpackm_cntl ); // unpack A1 (if packed)
// Create control trees for problems with relatively small n dimension
@@ -128,21 +128,21 @@ void bli_ger_cntl_init()
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
ger_mc,
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var1
packm_cntl_noscale, // pack A1 (if needed)
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var1
packm_cntl, // pack A1 (if needed)
ger_cntl_bs_ke_row,
unpackm_cntl ); // unpack A1 (if packed)
unpackm_cntl ); // unpack A1 (if packed)
ger_cntl_cp_bs_col
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
ger_mc,
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var1
packm_cntl_noscale, // pack A1 (if needed)
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var1
packm_cntl, // pack A1 (if needed)
ger_cntl_bs_ke_col,
unpackm_cntl ); // unpack A1 (if packed)
unpackm_cntl ); // unpack A1 (if packed)
// Create control trees for generally large problems. Here, we choose a
@@ -152,21 +152,21 @@ void bli_ger_cntl_init()
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
NULL, // do not pack A1
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
NULL, // do not pack A1
ger_cntl_cp_bs_row,
NULL ); // do not unpack A1
NULL ); // do not unpack A1
ger_cntl_ge_col
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
NULL, // do not pack A1
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
NULL, // do not pack A1
ger_cntl_cp_bs_col,
NULL ); // do not unpack A1
NULL ); // do not unpack A1
}
void bli_ger_cntl_finalize()

View File

@@ -35,7 +35,7 @@
#include "blis.h"
extern scalv_t* scalv_cntl;
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackv_t* unpackv_cntl;
@@ -95,7 +95,7 @@ void bli_hemv_cntl_init()
BLIS_VARIANT2,
hemv_mc,
scalv_cntl, // scale y up-front
packm_cntl_noscale, // pack A11 (if needed)
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
gemv_cntl_rp_bs_dot, // gemv_n_rp needed by var2
@@ -110,7 +110,7 @@ void bli_hemv_cntl_init()
BLIS_VARIANT2,
hemv_mc,
scalv_cntl, // scale y up-front
packm_cntl_noscale, // pack A11 (if needed)
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
gemv_cntl_rp_bs_axpy, // gemv_n_rp needed by var2

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackm_t* unpackm_cntl;
@@ -93,21 +93,21 @@ void bli_her_cntl_init()
bli_her_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
her_mc,
packv_cntl, // pack x1 (if needed)
NULL, // do NOT pack C11
packv_cntl, // pack x1 (if needed)
NULL, // do NOT pack C11
ger_cntl_rp_bs_row,
her_cntl_bs_ke_lrow_ucol,
NULL ); // no unpacking needed
NULL ); // no unpacking needed
her_cntl_ge_lcol_urow
=
bli_her_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
her_mc,
packv_cntl, // pack x1 (if needed)
NULL, // do NOT pack C11
packv_cntl, // pack x1 (if needed)
NULL, // do NOT pack C11
ger_cntl_cp_bs_col,
her_cntl_bs_ke_lcol_urow,
NULL ); // no unpacking needed
NULL ); // no unpacking needed
}
void bli_her_cntl_finalize()

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackm_t* unpackm_cntl;
@@ -93,25 +93,25 @@ void bli_her2_cntl_init()
bli_her2_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
her2_mc,
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
packm_cntl_noscale, // pack C11 (if needed)
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack C11 (if needed)
ger_cntl_rp_bs_row,
ger_cntl_rp_bs_row,
her2_cntl_bs_ke_lrow_ucol,
unpackm_cntl ); // unpack C11 (if packed)
unpackm_cntl ); // unpack C11 (if packed)
her2_cntl_ge_lcol_urow
=
bli_her2_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT4,
her2_mc,
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
packm_cntl_noscale, // pack C11 (if needed)
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack C11 (if needed)
ger_cntl_cp_bs_col,
ger_cntl_cp_bs_col,
her2_cntl_bs_ke_lcol_urow,
unpackm_cntl ); // unpack C11 (if packed)
unpackm_cntl ); // unpack C11 (if packed)
}
void bli_her2_cntl_finalize()

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackv_t* unpackv_cntl;
@@ -92,25 +92,25 @@ void bli_trmv_cntl_init()
trmv_cntl_ge_nrow_tcol
=
bli_trmv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
BLIS_VARIANT1, // use var1 to maximize x1 usage
trmv_mc,
packm_cntl_noscale, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
trmv_cntl_bs_ke_nrow_tcol,
unpackv_cntl ); // unpack x1 (if packed)
unpackv_cntl ); // unpack x1 (if packed)
trmv_cntl_ge_ncol_trow
=
bli_trmv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
BLIS_VARIANT1, // use var1 to maximize x1 usage
trmv_mc,
packm_cntl_noscale, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
trmv_cntl_bs_ke_ncol_trow,
unpackv_cntl ); // unpack x1 (if packed)
unpackv_cntl ); // unpack x1 (if packed)
}
void bli_trmv_cntl_finalize()

View File

@@ -35,7 +35,7 @@
#include "blis.h"
extern scalv_t* scalv_cntl;
extern packm_t* packm_cntl_noscale;
extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackv_t* unpackv_cntl;
@@ -89,27 +89,27 @@ void bli_trsv_cntl_init()
trsv_cntl_ge_nrow_tcol
=
bli_trsv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
BLIS_VARIANT1, // use var1 to maximize x1 usage
trsv_mc,
scalv_cntl, // scale x up-front
packm_cntl_noscale, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
scalv_cntl, // scale x up-front
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
trsv_cntl_bs_ke_nrow_tcol,
unpackv_cntl ); // unpack x1 (if needed)
unpackv_cntl ); // unpack x1 (if needed)
trsv_cntl_ge_ncol_trow
=
bli_trsv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
BLIS_VARIANT1, // use var1 to maximize x1 usage
trsv_mc,
scalv_cntl, // scale x up-front
packm_cntl_noscale, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
scalv_cntl, // scale x up-front
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
NULL, // gemv_cp not needed by var1
trsv_cntl_bs_ke_ncol_trow,
unpackv_cntl ); // unpack x1 (if needed)
unpackv_cntl ); // unpack x1 (if needed)
}
void bli_trsv_cntl_finalize()

View File

@@ -109,7 +109,6 @@ void bli_gemm_cntl_init()
BLIS_VARIANT2,
gemm_mr,
gemm_kr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -123,7 +122,6 @@ void bli_gemm_cntl_init()
BLIS_VARIANT2,
gemm_kr,
gemm_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -138,7 +136,6 @@ void bli_gemm_cntl_init()
BLIS_VARIANT1,
gemm_mr,
gemm_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?

View File

@@ -103,7 +103,6 @@ void bli_hemm_cntl_init()
BLIS_VARIANT2,
hemm_mr,
hemm_kr,
FALSE, // do NOT scale by alpha
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -117,9 +116,7 @@ void bli_hemm_cntl_init()
BLIS_VARIANT2,
hemm_kr,
hemm_nr,
FALSE, // do NOT scale by alpha
//FALSE, // already dense; densify not necessary
TRUE, // densify (if needed)
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
@@ -133,7 +130,6 @@ void bli_hemm_cntl_init()
BLIS_VARIANT1,
hemm_mr,
hemm_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?

View File

@@ -104,7 +104,6 @@ void bli_her2k_cntl_init()
BLIS_VARIANT2,
her2k_mr,
her2k_kr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -118,7 +117,6 @@ void bli_her2k_cntl_init()
BLIS_VARIANT2,
her2k_kr,
her2k_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -133,7 +131,6 @@ void bli_her2k_cntl_init()
BLIS_VARIANT1,
her2k_mr,
her2k_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?

View File

@@ -103,7 +103,6 @@ void bli_herk_cntl_init()
BLIS_VARIANT2,
herk_mr,
herk_kr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -117,7 +116,6 @@ void bli_herk_cntl_init()
BLIS_VARIANT2,
herk_kr,
herk_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -132,7 +130,6 @@ void bli_herk_cntl_init()
BLIS_VARIANT1,
herk_mr,
herk_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?

View File

@@ -116,7 +116,6 @@ void bli_trmm_cntl_init()
// multiple is set to mr.
trmm_mr,
trmm_mr,
FALSE, // do NOT scale by alpha
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -132,7 +131,6 @@ void bli_trmm_cntl_init()
// since "k" dim multiple is set to mr above.
trmm_mr,
trmm_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -149,7 +147,6 @@ void bli_trmm_cntl_init()
// multiple is set to nr.
trmm_mr,
trmm_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -165,7 +162,6 @@ void bli_trmm_cntl_init()
// since "k" dim multiple is set to nr above.
trmm_nr,
trmm_nr,
FALSE, // do NOT scale by alpha
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -180,7 +176,6 @@ void bli_trmm_cntl_init()
BLIS_VARIANT1,
trmm_mr,
trmm_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?

View File

@@ -34,7 +34,8 @@
#include "blis.h"
extern trmm_t* trmm3_cntl;
extern trmm_t* trmm_l_cntl;
extern trmm_t* trmm_r_cntl;
//
// Define object-based interface.
@@ -133,7 +134,8 @@ void bli_trmm3( side_t side,
&beta_local );
// Choose the control tree.
cntl = trmm3_cntl;
if ( bli_is_left( side ) ) cntl = trmm_l_cntl;
else cntl = trmm_r_cntl;
// Invoke the internal back-end.
bli_trmm_int( &alpha_local,

View File

@@ -32,7 +32,6 @@
*/
#include "bli_trmm3_cntl.h"
#include "bli_trmm3_check.h"

View File

@@ -1,236 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
extern scalm_t* scalm_cntl;
extern gemm_t* gemm_cntl_bp_ke;
trmm_t* trmm3_cntl;
trmm_t* trmm3_cntl_bp_ke;
trmm_t* trmm3_cntl_op_bp;
trmm_t* trmm3_cntl_mm_op;
trmm_t* trmm3_cntl_vl_mm;
packm_t* trmm3_packa_cntl;
packm_t* trmm3_packb_cntl;
packm_t* trmm3_packc_cntl;
unpackm_t* trmm3_unpackc_cntl;
blksz_t* trmm3_mc;
blksz_t* trmm3_nc;
blksz_t* trmm3_kc;
blksz_t* trmm3_mr;
blksz_t* trmm3_nr;
blksz_t* trmm3_kr;
blksz_t* trmm3_ni;
void bli_trmm3_cntl_init()
{
// Create blocksize objects for each dimension.
trmm3_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
trmm3_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
trmm3_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
trmm3_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
trmm3_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
trmm3_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
trmm3_ni = bli_blksz_obj_create( BLIS_DEFAULT_NI_S, 0,
BLIS_DEFAULT_NI_D, 0,
BLIS_DEFAULT_NI_C, 0,
BLIS_DEFAULT_NI_Z, 0 );
// Create control tree objects for packm operations on a, b, and c.
trmm3_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3, // pack panels of A compactly
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to mr.
trmm3_mr,
trmm3_mr,
FALSE, // do NOT scale by alpha
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS,
BLIS_BUFFER_FOR_A_BLOCK );
trmm3_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: m dim multiple here must be mr
// since "k" dim multiple is set to mr above.
trmm3_mr,
trmm3_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
trmm3_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
trmm3_mr,
trmm3_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
trmm3_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
// Create control tree object for lowest-level block-panel kernel.
trmm3_cntl_bp_ke
=
bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL );
// Create control tree object for outer panel (to block-panel)
// problem, packing a and b.
trmm3_cntl_op_bp
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
//BLIS_VARIANT4, // var1 with incremental pack in iter 0
BLIS_VARIANT1,
trmm3_mc,
trmm3_ni,
NULL,
trmm3_packa_cntl,
trmm3_packb_cntl,
NULL,
trmm3_cntl_bp_ke,
gemm_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates, packing a and b.
trmm3_cntl_mm_op
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
trmm3_kc,
NULL,
NULL,
NULL,
NULL,
NULL,
trmm3_cntl_op_bp,
NULL,
NULL );
// Create control tree object for very large problem via multiple
// general problems, packing a and b.
trmm3_cntl_vl_mm
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
trmm3_nc,
NULL,
NULL,
NULL,
NULL,
NULL,
trmm3_cntl_mm_op,
NULL,
NULL );
// Alias the "master" trmm3 control tree to a shorter name.
//trmm3_cntl = trmm3_cntl_mm_op;
trmm3_cntl = trmm3_cntl_vl_mm;
}
void bli_trmm3_cntl_finalize()
{
bli_blksz_obj_free( trmm3_mc );
bli_blksz_obj_free( trmm3_nc );
bli_blksz_obj_free( trmm3_kc );
bli_blksz_obj_free( trmm3_mr );
bli_blksz_obj_free( trmm3_nr );
bli_blksz_obj_free( trmm3_kr );
bli_blksz_obj_free( trmm3_ni );
bli_cntl_obj_free( trmm3_packa_cntl );
bli_cntl_obj_free( trmm3_packb_cntl );
bli_cntl_obj_free( trmm3_packc_cntl );
bli_cntl_obj_free( trmm3_unpackc_cntl );
bli_cntl_obj_free( trmm3_cntl_bp_ke );
bli_cntl_obj_free( trmm3_cntl_op_bp );
bli_cntl_obj_free( trmm3_cntl_mm_op );
bli_cntl_obj_free( trmm3_cntl_vl_mm );
}

View File

@@ -1,37 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_trmm3_cntl_init( void );
void bli_trmm3_cntl_finalize( void );

View File

@@ -116,7 +116,6 @@ void bli_trsm_cntl_init()
// support right and bottom-right edge cases
trsm_mr,
trsm_mr,
FALSE, // do NOT scale by alpha
TRUE, // densify
TRUE, // invert diagonal
TRUE, // reverse iteration if upper?
@@ -132,7 +131,6 @@ void bli_trsm_cntl_init()
// B_pack is updated (ie: serves as C) in trsm
trsm_mr,
trsm_nr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -147,7 +145,6 @@ void bli_trsm_cntl_init()
BLIS_VARIANT2,
trsm_nr,
trsm_mr,
FALSE, // do NOT scale by alpha
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -161,7 +158,6 @@ void bli_trsm_cntl_init()
BLIS_VARIANT3, // pack panels of B compactly
trsm_mr,
trsm_mr,
FALSE, // do NOT scale by alpha
TRUE, // densify
TRUE, // invert diagonal
FALSE, // reverse iteration if upper?
@@ -176,7 +172,6 @@ void bli_trsm_cntl_init()
BLIS_VARIANT1,
trsm_mr,
trsm_nr,
FALSE, // do NOT scale by beta
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?

View File

@@ -61,7 +61,6 @@ void bli_cntl_init( void )
bli_herk_cntl_init();
bli_her2k_cntl_init();
bli_trmm_cntl_init();
bli_trmm3_cntl_init();
bli_trsm_cntl_init();
}
@@ -92,7 +91,6 @@ void bli_cntl_finalize( void )
bli_herk_cntl_finalize();
bli_her2k_cntl_finalize();
bli_trmm_cntl_finalize();
bli_trmm3_cntl_finalize();
bli_trsm_cntl_finalize();
}

View File

@@ -799,11 +799,19 @@ bli_obj_width_stored( obj )
// -- Miscellaneous object macros --
// Make an alias (shallow copy)
// Make a special alias (shallow copy) that does not overwrite pack_mem
// entry.
#define bli_obj_alias_for_packing( a, b ) \
{ \
bli_obj_init_basic_shallow_copy_of( a, b ); \
}
// Make a full alias (shallow copy), including pack_mem and friends
#define bli_obj_alias_to( a, b ) \
{ \
bli_obj_init_as_copy_of( a, b ); \
bli_obj_init_full_shallow_copy_of( a, b ); \
}
// Check if two objects are aliases of one another
@@ -844,10 +852,8 @@ bli_obj_width_stored( obj )
#define bli_obj_init_pack( obj_p ) \
{ \
mem_t* pack_mem = bli_obj_pack_mem( *obj_p ); \
/*mem_t* cast_mem = bli_obj_cast_mem( *obj_p );*/ \
\
bli_mem_set_buffer( NULL, pack_mem ); \
/*bli_mem_set_buffer( NULL, cast_mem );*/ \
}
@@ -868,12 +874,6 @@ bli_obj_width_stored( obj )
mem_t* pack_mem = bli_obj_pack_mem( *(obj_p) ); \
if ( bli_mem_is_alloc( pack_mem ) ) \
bli_mem_release( pack_mem ); \
\
/*
mem_t* cast_mem = bli_obj_cast_mem( *(obj_p) ); \
if ( bli_mem_is_alloc( cast_mem ) ) \
bli_mem_release( cast_mem ); \
*/ \
}

View File

@@ -480,15 +480,12 @@ typedef struct obj_s
inc_t ps; // panel stride (distance to next panel)
inc_t pd; // panel dimension (the "width" of a panel:
// usually MR or NR)
//mem_t cast_mem; // cached memory region for casting
} obj_t;
// Define these macros here since they must be updated if contents of
// obj_t changes.
#define bli_obj_init_as_copy_of( a, b ) \
#define bli_obj_init_basic_shallow_copy_of( a, b ) \
{ \
(b).root = (a).root; \
\
@@ -504,12 +501,28 @@ typedef struct obj_s
(b).buffer = (a).buffer; \
(b).rs = (a).rs; \
(b).cs = (a).cs; \
\
(b).scalar = (a).scalar; \
\
/* We must NOT copy pack_mem field since this macro forms the basis of
bli_obj_alias_to(), which is used in packm_init(). There, we want to
copy the basic fields of the obj_t but PRESERVE the pack_mem field
(and the corresponding dimensions and stride) of the destination
object since it holds the cached mem_t object and buffer. */ \
of the destination object since it holds the "cached" mem_t object
and buffer. The other fields, such as padded dimensions, are always
set by bli_packm_init(), so we don't need to copy them either. */ \
}
#define bli_obj_init_full_shallow_copy_of( a, b ) \
{ \
/* This macro implements a full alias (shallow copy) that copies all
fields of the obj_t struct. */ \
bli_obj_init_basic_shallow_copy_of( a, b ); \
\
(b).pack_mem = (a).pack_mem; \
(b).m_padded = (a).m_padded; \
(b).n_padded = (a).n_padded; \
(b).ps = (a).ps; \
(b).pd = (a).pd; \
}
#define bli_obj_init_subpart_from( a, b ) \
@@ -518,8 +531,8 @@ typedef struct obj_s
\
(b).offm = (a).offm; \
(b).offn = (a).offn; \
\
\
/* Avoid copying m since it will be overwritten. */ \
/* Avoid copying n since it will be overwritten. */ \
(b).diag_off = (a).diag_off; \
\
(b).info = (a).info; \
@@ -528,18 +541,19 @@ typedef struct obj_s
(b).buffer = (a).buffer; \
(b).rs = (a).rs; \
(b).cs = (a).cs; \
\
(b).scalar = (a).scalar; \
\
/* We want to copy the pack_mem field here because this macro is used
when creating subpartitions, including those of packed objects. In
those situations, we want the subpartition to inherit the pack_mem
field, and the corresponding packed dimensions, of its parent. */ \
field of its parent, as well as other related fields such as the
padded dimensions. */ \
(b).pack_mem = (a).pack_mem; \
(b).m_padded = (a).m_padded; \
(b).n_padded = (a).n_padded; \
(b).pd = (a).pd; \
(b).ps = (a).ps; \
\
/*(b).cast_mem = (a).cast_mem;*/ \
}