mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Removed optional scaling from packm control tree.
Details: - Removed does_scale field from packm control tree node and bli_packm_cntl_obj_create() interface. Adjusted all invocations of _cntl_obj_create() accordingly. - Redefined/renamted macros that are used in aliasing so that now, bli_obj_alias_to() does a full alias (shallow copy) while bli_obj_alias_for_packing() does a partial alias that preserves the pack_mem-related fields of the aliasing (destination) object. - Removed bli_trmm3_cntl.c, .h after realizing that the trmm control tree will work just fine for bli_trmm3(). - Removed some commented vestiges of the typecasting functionality needed to support heterogeneous datatypes.
This commit is contained in:
@@ -34,18 +34,13 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
packm_t* packm_cntl_row_noscale;
|
||||
packm_t* packm_cntl_row_scale;
|
||||
packm_t* packm_cntl_col_noscale;
|
||||
packm_t* packm_cntl_col_scale;
|
||||
packm_t* packm_cntl_row;
|
||||
packm_t* packm_cntl_col;
|
||||
|
||||
packm_t* packm_cntl_rpn_noscale;
|
||||
packm_t* packm_cntl_rpn_scale;
|
||||
packm_t* packm_cntl_cpn_noscale;
|
||||
packm_t* packm_cntl_cpn_scale;
|
||||
packm_t* packm_cntl_rpn;
|
||||
packm_t* packm_cntl_cpn;
|
||||
|
||||
packm_t* packm_cntl_noscale;
|
||||
packm_t* packm_cntl_scale;
|
||||
packm_t* packm_cntl;
|
||||
|
||||
blksz_t* packm_mult_ldim;
|
||||
blksz_t* packm_mult_nvec;
|
||||
@@ -87,27 +82,13 @@ void bli_packm_cntl_init()
|
||||
// with structure, though they can also be used on matrices that
|
||||
// are already dense and/or have no structure.
|
||||
|
||||
// Create control trees to pack by rows (with and without scaling).
|
||||
packm_cntl_row_noscale
|
||||
// Create control trees to pack by rows.
|
||||
packm_cntl_row
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1, // When packing to rows:
|
||||
packm_mult_nvec, // - nvec multiple is used for m dimension
|
||||
packm_mult_ldim, // - ldim multiple is used for n dimension
|
||||
FALSE, // do NOT scale
|
||||
FALSE, // do NOT densify structure
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // do NOT iterate backwards if upper
|
||||
FALSE, // do NOT iterate backwards if lower
|
||||
BLIS_PACKED_ROWS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
packm_cntl_row_scale
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1, // When packing to rows:
|
||||
packm_mult_nvec, // - nvec multiple is used for m dimension
|
||||
packm_mult_ldim, // - ldim multiple is used for n dimension
|
||||
TRUE, // do scale
|
||||
FALSE, // do NOT densify structure
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // do NOT iterate backwards if upper
|
||||
@@ -116,27 +97,13 @@ void bli_packm_cntl_init()
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
|
||||
// Create control trees to pack by columns (with and without scaling).
|
||||
packm_cntl_col_noscale
|
||||
// Create control trees to pack by columns.
|
||||
packm_cntl_col
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1, // When packing to columns:
|
||||
packm_mult_ldim, // - ldim multiple is used for m dimension
|
||||
packm_mult_nvec, // - nvec multiple is used for n dimension
|
||||
FALSE, // do NOT scale
|
||||
FALSE, // do NOT densify structure
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // do NOT iterate backwards if upper
|
||||
FALSE, // do NOT iterate backwards if lower
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
packm_cntl_col_scale
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1, // When packing to columns:
|
||||
packm_mult_ldim, // - ldim multiple is used for m dimension
|
||||
packm_mult_nvec, // - nvec multiple is used for n dimension
|
||||
TRUE, // do scale
|
||||
FALSE, // do NOT densify structure
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // do NOT iterate backwards if upper
|
||||
@@ -147,16 +114,13 @@ void bli_packm_cntl_init()
|
||||
|
||||
// Set defaults when we don't care whether the packing is by rows or
|
||||
// by columns.
|
||||
packm_cntl_noscale = packm_cntl_col_noscale;
|
||||
packm_cntl_scale = packm_cntl_col_scale;
|
||||
packm_cntl = packm_cntl_col;
|
||||
}
|
||||
|
||||
void bli_packm_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( packm_cntl_row_noscale );
|
||||
bli_cntl_obj_free( packm_cntl_row_scale );
|
||||
bli_cntl_obj_free( packm_cntl_col_noscale );
|
||||
bli_cntl_obj_free( packm_cntl_col_scale );
|
||||
bli_cntl_obj_free( packm_cntl_row );
|
||||
bli_cntl_obj_free( packm_cntl_col );
|
||||
|
||||
bli_blksz_obj_free( packm_mult_ldim );
|
||||
bli_blksz_obj_free( packm_mult_nvec );
|
||||
@@ -166,7 +130,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
blksz_t* mr,
|
||||
blksz_t* nr,
|
||||
bool_t does_scale,
|
||||
bool_t does_densify,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
@@ -182,7 +145,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
||||
cntl->var_num = var_num;
|
||||
cntl->mr = mr;
|
||||
cntl->nr = nr;
|
||||
cntl->does_scale = does_scale;
|
||||
cntl->does_densify = does_densify;
|
||||
cntl->does_invert_diag = does_invert_diag;
|
||||
cntl->rev_iter_if_upper = rev_iter_if_upper;
|
||||
@@ -198,7 +160,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl,
|
||||
varnum_t var_num,
|
||||
blksz_t* mr,
|
||||
blksz_t* nr,
|
||||
bool_t does_scale,
|
||||
bool_t does_densify,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
@@ -210,7 +171,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl,
|
||||
cntl->var_num = var_num;
|
||||
cntl->mr = mr;
|
||||
cntl->nr = nr;
|
||||
cntl->does_scale = does_scale;
|
||||
cntl->does_densify = does_densify;
|
||||
cntl->does_invert_diag = does_invert_diag;
|
||||
cntl->rev_iter_if_upper = rev_iter_if_upper;
|
||||
|
||||
@@ -38,7 +38,6 @@ struct packm_s
|
||||
varnum_t var_num;
|
||||
blksz_t* mr;
|
||||
blksz_t* nr;
|
||||
bool_t does_scale;
|
||||
bool_t does_densify;
|
||||
bool_t does_invert_diag;
|
||||
bool_t rev_iter_if_upper;
|
||||
@@ -51,7 +50,6 @@ typedef struct packm_s packm_t;
|
||||
#define cntl_mr( cntl ) cntl->mr
|
||||
#define cntl_nr( cntl ) cntl->nr
|
||||
|
||||
#define cntl_does_scale( cntl ) cntl->does_scale
|
||||
#define cntl_does_densify( cntl ) cntl->does_densify
|
||||
#define cntl_does_invert_diag( cntl ) cntl->does_invert_diag
|
||||
#define cntl_rev_iter_if_upper( cntl ) cntl->rev_iter_if_upper
|
||||
@@ -73,7 +71,6 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
blksz_t* mr_def,
|
||||
blksz_t* nr_def,
|
||||
bool_t does_scale,
|
||||
bool_t does_densify,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
@@ -85,7 +82,6 @@ void bli_packm_cntl_obj_init( packm_t* cntl,
|
||||
varnum_t var_num,
|
||||
blksz_t* mr_def,
|
||||
blksz_t* nr_def,
|
||||
bool_t does_scale,
|
||||
bool_t does_densify,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
|
||||
@@ -185,8 +185,10 @@ void bli_packm_init_pack( bool_t densify,
|
||||
void* buf;
|
||||
|
||||
|
||||
// We begin by copying the basic fields of c.
|
||||
bli_obj_alias_to( *c, *p );
|
||||
// We begin by copying the basic fields of c. We do NOT copy the
|
||||
// pack_mem entry from c because the entry in p may be cached from
|
||||
// a previous iteration, and thus we don't want to overwrite it.
|
||||
bli_obj_alias_for_packing( *c, *p );
|
||||
|
||||
// Update the dimension fields to explicitly reflect a transposition,
|
||||
// if needed.
|
||||
|
||||
@@ -110,8 +110,9 @@ void bli_packm_int( obj_t* beta,
|
||||
// to be non-unit even when no scaling is prescribed. If the control tree
|
||||
// indicates no scaling, then make sure that BLIS_ONE is passed into the
|
||||
// packm implementation.
|
||||
if ( cntl_does_scale( cntl ) ) beta_use = beta;
|
||||
else beta_use = &BLIS_ONE;
|
||||
//if ( cntl_does_scale( cntl ) ) beta_use = beta;
|
||||
//else beta_use = &BLIS_ONE;
|
||||
beta_use = &BLIS_ONE;
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalv_t* scalv_cntl;
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackv_t* unpackv_cntl;
|
||||
|
||||
@@ -107,23 +107,23 @@ void bli_gemv_cntl_init()
|
||||
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemv_nc,
|
||||
scalv_cntl, // scale y up-front
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var2
|
||||
scalv_cntl, // scale y up-front
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var2
|
||||
gemv_cntl_bs_ke_dot,
|
||||
NULL ); // y is not partitioned in var2
|
||||
NULL ); // y is not partitioned in var2
|
||||
gemv_cntl_rp_bs_axpy
|
||||
=
|
||||
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemv_nc,
|
||||
scalv_cntl, // scale y up-front
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var2
|
||||
scalv_cntl, // scale y up-front
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var2
|
||||
gemv_cntl_bs_ke_axpy,
|
||||
NULL ); // y is not partitioned in var2
|
||||
NULL ); // y is not partitioned in var2
|
||||
|
||||
|
||||
// Create control trees for problems with relatively small n dimension
|
||||
@@ -133,23 +133,23 @@ void bli_gemv_cntl_init()
|
||||
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemv_mc,
|
||||
NULL, // no scaling in blk_var1
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // no scaling in blk_var1
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
gemv_cntl_bs_ke_dot,
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
gemv_cntl_cp_bs_axpy
|
||||
=
|
||||
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemv_mc,
|
||||
NULL, // no scaling in blk_var1
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // no scaling in blk_var1
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
gemv_cntl_bs_ke_axpy,
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
|
||||
|
||||
// Create control trees for generally large problems. Here, we choose a
|
||||
@@ -159,23 +159,23 @@ void bli_gemv_cntl_init()
|
||||
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemv_mc,
|
||||
NULL, // no scaling in blk_var1
|
||||
NULL, // do not pack A1
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // no scaling in blk_var1
|
||||
NULL, // do not pack A1
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
gemv_cntl_rp_bs_dot,
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
gemv_cntl_ge_axpy
|
||||
=
|
||||
bli_gemv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemv_mc,
|
||||
NULL, // no scaling in blk_var1
|
||||
NULL, // do not pack A1
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // no scaling in blk_var1
|
||||
NULL, // do not pack A1
|
||||
NULL, // x is not partitioned in var1
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
gemv_cntl_rp_bs_axpy,
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
unpackv_cntl ); // unpack y1 (if packed)
|
||||
}
|
||||
|
||||
void bli_gemv_cntl_finalize()
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackm_t* unpackm_cntl;
|
||||
|
||||
@@ -104,21 +104,21 @@ void bli_ger_cntl_init()
|
||||
bli_ger_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
ger_nc,
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
ger_cntl_bs_ke_row,
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
ger_cntl_rp_bs_col
|
||||
=
|
||||
bli_ger_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
ger_nc,
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
ger_cntl_bs_ke_col,
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
|
||||
|
||||
// Create control trees for problems with relatively small n dimension
|
||||
@@ -128,21 +128,21 @@ void bli_ger_cntl_init()
|
||||
bli_ger_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
ger_mc,
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var1
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var1
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
ger_cntl_bs_ke_row,
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
ger_cntl_cp_bs_col
|
||||
=
|
||||
bli_ger_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
ger_mc,
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var1
|
||||
packm_cntl_noscale, // pack A1 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // y is not partitioned in var1
|
||||
packm_cntl, // pack A1 (if needed)
|
||||
ger_cntl_bs_ke_col,
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
unpackm_cntl ); // unpack A1 (if packed)
|
||||
|
||||
|
||||
// Create control trees for generally large problems. Here, we choose a
|
||||
@@ -152,21 +152,21 @@ void bli_ger_cntl_init()
|
||||
bli_ger_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
ger_nc,
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // do not pack A1
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // do not pack A1
|
||||
ger_cntl_cp_bs_row,
|
||||
NULL ); // do not unpack A1
|
||||
NULL ); // do not unpack A1
|
||||
ger_cntl_ge_col
|
||||
=
|
||||
bli_ger_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
ger_nc,
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // do not pack A1
|
||||
NULL, // x is not partitioned in var2
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
NULL, // do not pack A1
|
||||
ger_cntl_cp_bs_col,
|
||||
NULL ); // do not unpack A1
|
||||
NULL ); // do not unpack A1
|
||||
}
|
||||
|
||||
void bli_ger_cntl_finalize()
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalv_t* scalv_cntl;
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackv_t* unpackv_cntl;
|
||||
|
||||
@@ -95,7 +95,7 @@ void bli_hemv_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
hemv_mc,
|
||||
scalv_cntl, // scale y up-front
|
||||
packm_cntl_noscale, // pack A11 (if needed)
|
||||
packm_cntl, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
gemv_cntl_rp_bs_dot, // gemv_n_rp needed by var2
|
||||
@@ -110,7 +110,7 @@ void bli_hemv_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
hemv_mc,
|
||||
scalv_cntl, // scale y up-front
|
||||
packm_cntl_noscale, // pack A11 (if needed)
|
||||
packm_cntl, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
gemv_cntl_rp_bs_axpy, // gemv_n_rp needed by var2
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackm_t* unpackm_cntl;
|
||||
|
||||
@@ -93,21 +93,21 @@ void bli_her_cntl_init()
|
||||
bli_her_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
her_mc,
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // do NOT pack C11
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // do NOT pack C11
|
||||
ger_cntl_rp_bs_row,
|
||||
her_cntl_bs_ke_lrow_ucol,
|
||||
NULL ); // no unpacking needed
|
||||
NULL ); // no unpacking needed
|
||||
her_cntl_ge_lcol_urow
|
||||
=
|
||||
bli_her_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
her_mc,
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // do NOT pack C11
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
NULL, // do NOT pack C11
|
||||
ger_cntl_cp_bs_col,
|
||||
her_cntl_bs_ke_lcol_urow,
|
||||
NULL ); // no unpacking needed
|
||||
NULL ); // no unpacking needed
|
||||
}
|
||||
|
||||
void bli_her_cntl_finalize()
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackm_t* unpackm_cntl;
|
||||
|
||||
@@ -93,25 +93,25 @@ void bli_her2_cntl_init()
|
||||
bli_her2_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
her2_mc,
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl_noscale, // pack C11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl, // pack C11 (if needed)
|
||||
ger_cntl_rp_bs_row,
|
||||
ger_cntl_rp_bs_row,
|
||||
her2_cntl_bs_ke_lrow_ucol,
|
||||
unpackm_cntl ); // unpack C11 (if packed)
|
||||
unpackm_cntl ); // unpack C11 (if packed)
|
||||
her2_cntl_ge_lcol_urow
|
||||
=
|
||||
bli_her2_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT4,
|
||||
her2_mc,
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl_noscale, // pack C11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
packv_cntl, // pack y1 (if needed)
|
||||
packm_cntl, // pack C11 (if needed)
|
||||
ger_cntl_cp_bs_col,
|
||||
ger_cntl_cp_bs_col,
|
||||
her2_cntl_bs_ke_lcol_urow,
|
||||
unpackm_cntl ); // unpack C11 (if packed)
|
||||
unpackm_cntl ); // unpack C11 (if packed)
|
||||
}
|
||||
|
||||
void bli_her2_cntl_finalize()
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackv_t* unpackv_cntl;
|
||||
|
||||
@@ -92,25 +92,25 @@ void bli_trmv_cntl_init()
|
||||
trmv_cntl_ge_nrow_tcol
|
||||
=
|
||||
bli_trmv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
trmv_mc,
|
||||
packm_cntl_noscale, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
packm_cntl, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
trmv_cntl_bs_ke_nrow_tcol,
|
||||
unpackv_cntl ); // unpack x1 (if packed)
|
||||
unpackv_cntl ); // unpack x1 (if packed)
|
||||
trmv_cntl_ge_ncol_trow
|
||||
=
|
||||
bli_trmv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
trmv_mc,
|
||||
packm_cntl_noscale, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
packm_cntl, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
trmv_cntl_bs_ke_ncol_trow,
|
||||
unpackv_cntl ); // unpack x1 (if packed)
|
||||
unpackv_cntl ); // unpack x1 (if packed)
|
||||
}
|
||||
|
||||
void bli_trmv_cntl_finalize()
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalv_t* scalv_cntl;
|
||||
extern packm_t* packm_cntl_noscale;
|
||||
extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackv_t* unpackv_cntl;
|
||||
|
||||
@@ -89,27 +89,27 @@ void bli_trsv_cntl_init()
|
||||
trsv_cntl_ge_nrow_tcol
|
||||
=
|
||||
bli_trsv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
trsv_mc,
|
||||
scalv_cntl, // scale x up-front
|
||||
packm_cntl_noscale, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
scalv_cntl, // scale x up-front
|
||||
packm_cntl, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
trsv_cntl_bs_ke_nrow_tcol,
|
||||
unpackv_cntl ); // unpack x1 (if needed)
|
||||
unpackv_cntl ); // unpack x1 (if needed)
|
||||
trsv_cntl_ge_ncol_trow
|
||||
=
|
||||
bli_trsv_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
BLIS_VARIANT1, // use var1 to maximize x1 usage
|
||||
trsv_mc,
|
||||
scalv_cntl, // scale x up-front
|
||||
packm_cntl_noscale, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
scalv_cntl, // scale x up-front
|
||||
packm_cntl, // pack A11 (if needed)
|
||||
packv_cntl, // pack x1 (if needed)
|
||||
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
|
||||
NULL, // gemv_cp not needed by var1
|
||||
trsv_cntl_bs_ke_ncol_trow,
|
||||
unpackv_cntl ); // unpack x1 (if needed)
|
||||
unpackv_cntl ); // unpack x1 (if needed)
|
||||
}
|
||||
|
||||
void bli_trsv_cntl_finalize()
|
||||
|
||||
@@ -109,7 +109,6 @@ void bli_gemm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
gemm_mr,
|
||||
gemm_kr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -123,7 +122,6 @@ void bli_gemm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
gemm_kr,
|
||||
gemm_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -138,7 +136,6 @@ void bli_gemm_cntl_init()
|
||||
BLIS_VARIANT1,
|
||||
gemm_mr,
|
||||
gemm_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
|
||||
@@ -103,7 +103,6 @@ void bli_hemm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
hemm_mr,
|
||||
hemm_kr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -117,9 +116,7 @@ void bli_hemm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
hemm_kr,
|
||||
hemm_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
//FALSE, // already dense; densify not necessary
|
||||
TRUE, // densify (if needed)
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
@@ -133,7 +130,6 @@ void bli_hemm_cntl_init()
|
||||
BLIS_VARIANT1,
|
||||
hemm_mr,
|
||||
hemm_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
|
||||
@@ -104,7 +104,6 @@ void bli_her2k_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
her2k_mr,
|
||||
her2k_kr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -118,7 +117,6 @@ void bli_her2k_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
her2k_kr,
|
||||
her2k_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -133,7 +131,6 @@ void bli_her2k_cntl_init()
|
||||
BLIS_VARIANT1,
|
||||
her2k_mr,
|
||||
her2k_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
|
||||
@@ -103,7 +103,6 @@ void bli_herk_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
herk_mr,
|
||||
herk_kr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -117,7 +116,6 @@ void bli_herk_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
herk_kr,
|
||||
herk_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -132,7 +130,6 @@ void bli_herk_cntl_init()
|
||||
BLIS_VARIANT1,
|
||||
herk_mr,
|
||||
herk_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
|
||||
@@ -116,7 +116,6 @@ void bli_trmm_cntl_init()
|
||||
// multiple is set to mr.
|
||||
trmm_mr,
|
||||
trmm_mr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -132,7 +131,6 @@ void bli_trmm_cntl_init()
|
||||
// since "k" dim multiple is set to mr above.
|
||||
trmm_mr,
|
||||
trmm_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -149,7 +147,6 @@ void bli_trmm_cntl_init()
|
||||
// multiple is set to nr.
|
||||
trmm_mr,
|
||||
trmm_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -165,7 +162,6 @@ void bli_trmm_cntl_init()
|
||||
// since "k" dim multiple is set to nr above.
|
||||
trmm_nr,
|
||||
trmm_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -180,7 +176,6 @@ void bli_trmm_cntl_init()
|
||||
BLIS_VARIANT1,
|
||||
trmm_mr,
|
||||
trmm_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern trmm_t* trmm3_cntl;
|
||||
extern trmm_t* trmm_l_cntl;
|
||||
extern trmm_t* trmm_r_cntl;
|
||||
|
||||
//
|
||||
// Define object-based interface.
|
||||
@@ -133,7 +134,8 @@ void bli_trmm3( side_t side,
|
||||
&beta_local );
|
||||
|
||||
// Choose the control tree.
|
||||
cntl = trmm3_cntl;
|
||||
if ( bli_is_left( side ) ) cntl = trmm_l_cntl;
|
||||
else cntl = trmm_r_cntl;
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_trmm_int( &alpha_local,
|
||||
|
||||
@@ -32,7 +32,6 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_trmm3_cntl.h"
|
||||
#include "bli_trmm3_check.h"
|
||||
|
||||
|
||||
|
||||
@@ -1,236 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2013, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
extern gemm_t* gemm_cntl_bp_ke;
|
||||
|
||||
trmm_t* trmm3_cntl;
|
||||
|
||||
trmm_t* trmm3_cntl_bp_ke;
|
||||
trmm_t* trmm3_cntl_op_bp;
|
||||
trmm_t* trmm3_cntl_mm_op;
|
||||
trmm_t* trmm3_cntl_vl_mm;
|
||||
|
||||
packm_t* trmm3_packa_cntl;
|
||||
packm_t* trmm3_packb_cntl;
|
||||
packm_t* trmm3_packc_cntl;
|
||||
unpackm_t* trmm3_unpackc_cntl;
|
||||
|
||||
blksz_t* trmm3_mc;
|
||||
blksz_t* trmm3_nc;
|
||||
blksz_t* trmm3_kc;
|
||||
blksz_t* trmm3_mr;
|
||||
blksz_t* trmm3_nr;
|
||||
blksz_t* trmm3_kr;
|
||||
blksz_t* trmm3_ni;
|
||||
|
||||
|
||||
void bli_trmm3_cntl_init()
|
||||
{
|
||||
// Create blocksize objects for each dimension.
|
||||
trmm3_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
|
||||
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
|
||||
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
|
||||
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
|
||||
|
||||
trmm3_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
|
||||
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
|
||||
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
|
||||
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
|
||||
|
||||
trmm3_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
|
||||
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
|
||||
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
|
||||
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
|
||||
|
||||
trmm3_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
|
||||
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
|
||||
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
|
||||
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
|
||||
|
||||
trmm3_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
|
||||
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
|
||||
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
|
||||
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
|
||||
|
||||
trmm3_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
|
||||
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
|
||||
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
|
||||
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
|
||||
|
||||
trmm3_ni = bli_blksz_obj_create( BLIS_DEFAULT_NI_S, 0,
|
||||
BLIS_DEFAULT_NI_D, 0,
|
||||
BLIS_DEFAULT_NI_C, 0,
|
||||
BLIS_DEFAULT_NI_Z, 0 );
|
||||
|
||||
|
||||
// Create control tree objects for packm operations on a, b, and c.
|
||||
trmm3_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3, // pack panels of A compactly
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to mr.
|
||||
trmm3_mr,
|
||||
trmm3_mr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trmm3_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple here must be mr
|
||||
// since "k" dim multiple is set to mr above.
|
||||
trmm3_mr,
|
||||
trmm3_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
trmm3_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trmm3_mr,
|
||||
trmm3_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
trmm3_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
trmm3_cntl_bp_ke
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
|
||||
BLIS_VARIANT2,
|
||||
NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem, packing a and b.
|
||||
trmm3_cntl_op_bp
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
//BLIS_VARIANT4, // var1 with incremental pack in iter 0
|
||||
BLIS_VARIANT1,
|
||||
trmm3_mc,
|
||||
trmm3_ni,
|
||||
NULL,
|
||||
trmm3_packa_cntl,
|
||||
trmm3_packb_cntl,
|
||||
NULL,
|
||||
trmm3_cntl_bp_ke,
|
||||
gemm_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates, packing a and b.
|
||||
trmm3_cntl_mm_op
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
trmm3_kc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm3_cntl_op_bp,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems, packing a and b.
|
||||
trmm3_cntl_vl_mm
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
trmm3_nc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm3_cntl_mm_op,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Alias the "master" trmm3 control tree to a shorter name.
|
||||
//trmm3_cntl = trmm3_cntl_mm_op;
|
||||
trmm3_cntl = trmm3_cntl_vl_mm;
|
||||
}
|
||||
|
||||
void bli_trmm3_cntl_finalize()
|
||||
{
|
||||
bli_blksz_obj_free( trmm3_mc );
|
||||
bli_blksz_obj_free( trmm3_nc );
|
||||
bli_blksz_obj_free( trmm3_kc );
|
||||
bli_blksz_obj_free( trmm3_mr );
|
||||
bli_blksz_obj_free( trmm3_nr );
|
||||
bli_blksz_obj_free( trmm3_kr );
|
||||
bli_blksz_obj_free( trmm3_ni );
|
||||
|
||||
bli_cntl_obj_free( trmm3_packa_cntl );
|
||||
bli_cntl_obj_free( trmm3_packb_cntl );
|
||||
bli_cntl_obj_free( trmm3_packc_cntl );
|
||||
bli_cntl_obj_free( trmm3_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( trmm3_cntl_bp_ke );
|
||||
bli_cntl_obj_free( trmm3_cntl_op_bp );
|
||||
bli_cntl_obj_free( trmm3_cntl_mm_op );
|
||||
bli_cntl_obj_free( trmm3_cntl_vl_mm );
|
||||
}
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2013, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_trmm3_cntl_init( void );
|
||||
void bli_trmm3_cntl_finalize( void );
|
||||
|
||||
@@ -116,7 +116,6 @@ void bli_trsm_cntl_init()
|
||||
// support right and bottom-right edge cases
|
||||
trsm_mr,
|
||||
trsm_mr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
TRUE, // densify
|
||||
TRUE, // invert diagonal
|
||||
TRUE, // reverse iteration if upper?
|
||||
@@ -132,7 +131,6 @@ void bli_trsm_cntl_init()
|
||||
// B_pack is updated (ie: serves as C) in trsm
|
||||
trsm_mr,
|
||||
trsm_nr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -147,7 +145,6 @@ void bli_trsm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
trsm_nr,
|
||||
trsm_mr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -161,7 +158,6 @@ void bli_trsm_cntl_init()
|
||||
BLIS_VARIANT3, // pack panels of B compactly
|
||||
trsm_mr,
|
||||
trsm_mr,
|
||||
FALSE, // do NOT scale by alpha
|
||||
TRUE, // densify
|
||||
TRUE, // invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -176,7 +172,6 @@ void bli_trsm_cntl_init()
|
||||
BLIS_VARIANT1,
|
||||
trsm_mr,
|
||||
trsm_nr,
|
||||
FALSE, // do NOT scale by beta
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
|
||||
@@ -61,7 +61,6 @@ void bli_cntl_init( void )
|
||||
bli_herk_cntl_init();
|
||||
bli_her2k_cntl_init();
|
||||
bli_trmm_cntl_init();
|
||||
bli_trmm3_cntl_init();
|
||||
bli_trsm_cntl_init();
|
||||
}
|
||||
|
||||
@@ -92,7 +91,6 @@ void bli_cntl_finalize( void )
|
||||
bli_herk_cntl_finalize();
|
||||
bli_her2k_cntl_finalize();
|
||||
bli_trmm_cntl_finalize();
|
||||
bli_trmm3_cntl_finalize();
|
||||
bli_trsm_cntl_finalize();
|
||||
}
|
||||
|
||||
|
||||
@@ -799,11 +799,19 @@ bli_obj_width_stored( obj )
|
||||
|
||||
// -- Miscellaneous object macros --
|
||||
|
||||
// Make an alias (shallow copy)
|
||||
// Make a special alias (shallow copy) that does not overwrite pack_mem
|
||||
// entry.
|
||||
|
||||
#define bli_obj_alias_for_packing( a, b ) \
|
||||
{ \
|
||||
bli_obj_init_basic_shallow_copy_of( a, b ); \
|
||||
}
|
||||
|
||||
// Make a full alias (shallow copy), including pack_mem and friends
|
||||
|
||||
#define bli_obj_alias_to( a, b ) \
|
||||
{ \
|
||||
bli_obj_init_as_copy_of( a, b ); \
|
||||
bli_obj_init_full_shallow_copy_of( a, b ); \
|
||||
}
|
||||
|
||||
// Check if two objects are aliases of one another
|
||||
@@ -844,10 +852,8 @@ bli_obj_width_stored( obj )
|
||||
#define bli_obj_init_pack( obj_p ) \
|
||||
{ \
|
||||
mem_t* pack_mem = bli_obj_pack_mem( *obj_p ); \
|
||||
/*mem_t* cast_mem = bli_obj_cast_mem( *obj_p );*/ \
|
||||
\
|
||||
bli_mem_set_buffer( NULL, pack_mem ); \
|
||||
/*bli_mem_set_buffer( NULL, cast_mem );*/ \
|
||||
}
|
||||
|
||||
|
||||
@@ -868,12 +874,6 @@ bli_obj_width_stored( obj )
|
||||
mem_t* pack_mem = bli_obj_pack_mem( *(obj_p) ); \
|
||||
if ( bli_mem_is_alloc( pack_mem ) ) \
|
||||
bli_mem_release( pack_mem ); \
|
||||
\
|
||||
/*
|
||||
mem_t* cast_mem = bli_obj_cast_mem( *(obj_p) ); \
|
||||
if ( bli_mem_is_alloc( cast_mem ) ) \
|
||||
bli_mem_release( cast_mem ); \
|
||||
*/ \
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -480,15 +480,12 @@ typedef struct obj_s
|
||||
inc_t ps; // panel stride (distance to next panel)
|
||||
inc_t pd; // panel dimension (the "width" of a panel:
|
||||
// usually MR or NR)
|
||||
|
||||
//mem_t cast_mem; // cached memory region for casting
|
||||
|
||||
} obj_t;
|
||||
|
||||
|
||||
// Define these macros here since they must be updated if contents of
|
||||
// obj_t changes.
|
||||
#define bli_obj_init_as_copy_of( a, b ) \
|
||||
#define bli_obj_init_basic_shallow_copy_of( a, b ) \
|
||||
{ \
|
||||
(b).root = (a).root; \
|
||||
\
|
||||
@@ -504,12 +501,28 @@ typedef struct obj_s
|
||||
(b).buffer = (a).buffer; \
|
||||
(b).rs = (a).rs; \
|
||||
(b).cs = (a).cs; \
|
||||
\
|
||||
(b).scalar = (a).scalar; \
|
||||
\
|
||||
/* We must NOT copy pack_mem field since this macro forms the basis of
|
||||
bli_obj_alias_to(), which is used in packm_init(). There, we want to
|
||||
copy the basic fields of the obj_t but PRESERVE the pack_mem field
|
||||
(and the corresponding dimensions and stride) of the destination
|
||||
object since it holds the cached mem_t object and buffer. */ \
|
||||
of the destination object since it holds the "cached" mem_t object
|
||||
and buffer. The other fields, such as padded dimensions, are always
|
||||
set by bli_packm_init(), so we don't need to copy them either. */ \
|
||||
}
|
||||
|
||||
#define bli_obj_init_full_shallow_copy_of( a, b ) \
|
||||
{ \
|
||||
/* This macro implements a full alias (shallow copy) that copies all
|
||||
fields of the obj_t struct. */ \
|
||||
bli_obj_init_basic_shallow_copy_of( a, b ); \
|
||||
\
|
||||
(b).pack_mem = (a).pack_mem; \
|
||||
(b).m_padded = (a).m_padded; \
|
||||
(b).n_padded = (a).n_padded; \
|
||||
(b).ps = (a).ps; \
|
||||
(b).pd = (a).pd; \
|
||||
}
|
||||
|
||||
#define bli_obj_init_subpart_from( a, b ) \
|
||||
@@ -518,8 +531,8 @@ typedef struct obj_s
|
||||
\
|
||||
(b).offm = (a).offm; \
|
||||
(b).offn = (a).offn; \
|
||||
\
|
||||
\
|
||||
/* Avoid copying m since it will be overwritten. */ \
|
||||
/* Avoid copying n since it will be overwritten. */ \
|
||||
(b).diag_off = (a).diag_off; \
|
||||
\
|
||||
(b).info = (a).info; \
|
||||
@@ -528,18 +541,19 @@ typedef struct obj_s
|
||||
(b).buffer = (a).buffer; \
|
||||
(b).rs = (a).rs; \
|
||||
(b).cs = (a).cs; \
|
||||
\
|
||||
(b).scalar = (a).scalar; \
|
||||
\
|
||||
/* We want to copy the pack_mem field here because this macro is used
|
||||
when creating subpartitions, including those of packed objects. In
|
||||
those situations, we want the subpartition to inherit the pack_mem
|
||||
field, and the corresponding packed dimensions, of its parent. */ \
|
||||
field of its parent, as well as other related fields such as the
|
||||
padded dimensions. */ \
|
||||
(b).pack_mem = (a).pack_mem; \
|
||||
(b).m_padded = (a).m_padded; \
|
||||
(b).n_padded = (a).n_padded; \
|
||||
(b).pd = (a).pd; \
|
||||
(b).ps = (a).ps; \
|
||||
\
|
||||
/*(b).cast_mem = (a).cast_mem;*/ \
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user