Removed cntl tree usage from packm implementation.

Details:
- Added new fields to obj_t info field:
  - invert_diag
  - pack_order_if_upper
  - pack_order_if_lower
  These fields allow packm_init() to embed information that begins
  in the control tree into the object so that the packm implementation
  does not need to use control trees at all. This is being done to aid
  Bryan's DxT code generation.
- Added macros that operate on above fields.
- Changed packm_init(), packm_blk_var2(), and packm_blk_var3() according
  to above changes.
- Made similar (but much simpler) changes to packv.
- Deprecated packm_blk_var1(), packm_unb_var1(), and packm_densify().
  These were part of prototype implementations and are no longer needed.
This commit is contained in:
Field G. Van Zee
2013-02-12 18:39:35 -06:00
parent eb139ae256
commit cf49e35f98
23 changed files with 454 additions and 354 deletions

View File

@@ -48,8 +48,10 @@ void bl2_packv_init( obj_t* a,
// 3. cast only: Not yet supported / not used.
// 4. no-op: The control tree sometimes directs us to skip the
// pack operation entirely. Alias p to a and return.
obj_t c;
num_t datatype;
pack_t pack_schema;
dim_t mult_dim;
obj_t c;
// Check parameters.
if ( bl2_error_checking_is_enabled() )
@@ -101,15 +103,97 @@ void bl2_packv_init( obj_t* a,
bl2_obj_alias_to( *a, c );
}
// Extract various fields from the control tree and pass them in
// explicitly into _init_pack(). This allows external code generators
// the option of bypassing usage of control trees altogether.
datatype = bl2_obj_datatype( *a );
pack_schema = cntl_pack_schema( cntl );
mult_dim = bl2_blksz_for_type( datatype, cntl_mult_dim( cntl ) );
// Initialize object p for the final packed vector.
bl2_packv_init_pack( &c,
p,
cntl );
bl2_packv_init_pack( pack_schema,
mult_dim,
&c,
p );
// Now p is ready to be packed.
}
void bl2_packv_init_pack( pack_t pack_schema,
dim_t mult_dim,
obj_t* c,
obj_t* p )
{
// In this function, we initialize an object p to represent the packed
// copy of the intermediate object c. At this point, the datatype of
// object c should be equal to the target datatype of the original
// object, either because:
// (1) c is set up to contain the typecast of the original object, or
// (2) c is aliased to the original object, which would only happen
// when the original object's datatype and target datatype are
// equal.
// So here, we want to create an object p that is identical to c, except
// that:
// (1) object p is marked as being stored in a standard, contiguous
// format (ie: a column vector),
// (2) the view offset of p is reset to (0,0),
// (3) object p contains a pack schema field that reflects its desired
// packing (ie: a contiguous vector), and
// (4) object p's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available. (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
// We also explicitly set the dimensions and strides of p in case c
// is just an alias to the original vector. (This is done in case that
// original vector is, say, 1xm because we like to think of our packed
// vectors as always column vectors.)
dim_t dim_c = bl2_obj_vector_dim( *c );
inc_t rs_p, cs_p;
// We begin by copying the basic fields of c.
bl2_obj_alias_to( *c, *p );
// Update the dimensions.
bl2_obj_set_dims( dim_c, 1, *p );
// Reset the view offsets to (0,0).
bl2_obj_set_offs( 0, 0, *p );
// Set the pack schema in the p object to the value in the control tree
// node.
bl2_obj_set_pack_schema( pack_schema, *p );
// Check the mem_t entry of p associated with the pack buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of p to the cached address of the pack memory.
bl2_obj_set_buffer_with_cached_packv_mem( *p, *p, mult_dim );
// Set the row and column strides of p based on the pack schema.
if ( pack_schema == BLIS_PACKED_VECTOR )
{
mem_t* mem;
// Access the mem_t entry cached in p.
mem = bl2_obj_pack_mem( *p );
// Set the strides to reflect a column-stored vector. Note that the
// column stride may never be used, and is only useful to determine
// how much space beyond the vector would need to be zero-padded, if
// zero-padding was needed.
rs_p = 1;
cs_p = bl2_mem_length( mem );
bl2_obj_set_incs( rs_p, cs_p, *p );
}
}
/*
void bl2_packv_init_cast( obj_t* a,
obj_t* p,
obj_t* c )
@@ -154,76 +238,5 @@ void bl2_packv_init_cast( obj_t* a,
// Note that the column stride should never be used.
bl2_obj_set_incs( 1, dim_a, *c );
}
void bl2_packv_init_pack( obj_t* c,
obj_t* p,
packv_t* cntl )
{
// In this function, we initialize an object p to represent the packed
// copy of the intermediate object c. At this point, the datatype of
// object c should be equal to the target datatype of the original
// object, either because:
// (1) c is set up to contain the typecast of the original object, or
// (2) c is aliased to the original object, which would only happen
// when the original object's datatype and target datatype are
// equal.
// So here, we want to create an object p that is identical to c, except
// that:
// (1) object p is marked as being stored in a standard, contiguous
// format (ie: a column vector),
// (2) the view offset of p is reset to (0,0),
// (3) object p contains a pack schema field that reflects its desired
// packing (ie: a contiguous vector), and
// (4) object p's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available. (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
// We also explicitly set the dimensions and strides of p in case c
// is just an alias to the original vector. (This is done in case that
// original vector is, say, 1xm because we like to think of our packed
// vectors as always column vectors.)
dim_t dim_c = bl2_obj_vector_dim( *c );
pack_t pack_schema = cntl_pack_schema( cntl );
inc_t rs_p, cs_p;
// We begin by copying the basic fields of c.
bl2_obj_alias_to( *c, *p );
// Update the dimensions.
bl2_obj_set_dims( dim_c, 1, *p );
// Reset the view offsets to (0,0).
bl2_obj_set_offs( 0, 0, *p );
// Set the pack schema in the p object to the value in the control tree
// node.
bl2_obj_set_pack_schema( pack_schema, *p );
// Check the mem_t entry of p associated with the pack buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of p to the cached address of the pack memory.
bl2_obj_set_buffer_with_cached_packv_mem( *p, *p, cntl );
// Set the row and column strides of p based on the pack schema.
if ( pack_schema == BLIS_PACKED_VECTOR )
{
mem_t* mem;
// Access the mem_t entry cached in p.
mem = bl2_obj_pack_mem( *p );
// Set the strides to reflect a column-stored vector. Note that the
// column stride may never be used, and is only useful to determine
// how much space beyond the vector would need to be zero-padded, if
// zero-padding was needed.
rs_p = 1;
cs_p = bl2_mem_length( mem );
bl2_obj_set_incs( rs_p, cs_p, *p );
}
}
*/

View File

@@ -36,11 +36,13 @@ void bl2_packv_init( obj_t* a,
obj_t* p,
packv_t* cntl );
void bl2_packv_init_pack( pack_t pack_schema,
dim_t mult_dim,
obj_t* c,
obj_t* p );
/*
void bl2_packv_init_cast( obj_t* a,
obj_t* p,
obj_t* c );
void bl2_packv_init_pack( obj_t* c,
obj_t* p,
packv_t* cntl );
*/

View File

@@ -147,6 +147,7 @@ void bl2_unpackv_int( obj_t* p,
}
}
/*
void bl2_unpackv_init_cast( obj_t* p,
obj_t* a,
obj_t* c )
@@ -191,4 +192,4 @@ void bl2_unpackv_init_cast( obj_t* p,
// buffer of c to the cached address of the cast memory.
bl2_obj_set_buffer_with_cached_cast_mem( *p, *c );
}
*/

View File

@@ -36,6 +36,8 @@ void bl2_unpackv_int( obj_t* p,
obj_t* a,
unpackv_t* cntl );
/*
void bl2_unpackv_init_cast( obj_t* p,
obj_t* a,
obj_t* c );
*/

View File

@@ -38,10 +38,6 @@
#include "bl2_packm_int.h"
#include "bl2_packm_part.h"
#include "bl2_packm_densify.h"
#include "bl2_packm_unb_var1.h"
#include "bl2_packm_blk_var1.h"
#include "bl2_packm_blk_var2.h"

View File

@@ -42,7 +42,6 @@ typedef void (*FUNCPTR_T)(
diag_t diagc,
uplo_t uploc,
trans_t transc,
bool_t densify,
dim_t m,
dim_t n,
dim_t m_max,
@@ -57,8 +56,7 @@ static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2);
void bl2_packm_blk_var2( obj_t* beta,
obj_t* c,
obj_t* p,
packm_t* cntl )
obj_t* p )
{
num_t dt_cp = bl2_obj_datatype( *c );
mem_t* mem_p = bl2_obj_pack_mem( *p );
@@ -68,7 +66,6 @@ void bl2_packm_blk_var2( obj_t* beta,
diag_t diagc = bl2_obj_diag( *c );
uplo_t uploc = bl2_obj_uplo( *c );
trans_t transc = bl2_obj_conjtrans_status( *c );
bool_t densify = cntl_does_densify( cntl );
dim_t m_p = bl2_obj_length( *p );
dim_t n_p = bl2_obj_width( *p );
@@ -98,7 +95,6 @@ void bl2_packm_blk_var2( obj_t* beta,
diagc,
uploc,
transc,
densify,
m_p,
n_p,
m_max_p,
@@ -118,7 +114,6 @@ void PASTEMAC(ch,varname )( \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
bool_t densify, \
dim_t m, \
dim_t n, \
dim_t m_max, \
@@ -248,7 +243,7 @@ void PASTEMAC(ch,varname )( \
/* If the current panel intersects the diagonal and C is either
upper- or lower-stored, then we assume C is symmetric or
Hermitian and that it must be densified (note we don't even
bother checking the densify parameter), in which case we pack
bother passing in a densify parameter), in which case we pack
the panel in three stages.
Otherwise, we pack the panel all at once. */ \
if ( bl2_intersects_diag_n( diagoffc_i, *m_panel, *n_panel ) && \

View File

@@ -34,8 +34,7 @@
void bl2_packm_blk_var2( obj_t* beta,
obj_t* c,
obj_t* p,
packm_t* cntl );
obj_t* p );
#undef GENTPROT
@@ -47,7 +46,6 @@ void PASTEMAC(ch,varname)( \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
bool_t densify, \
dim_t m, \
dim_t n, \
dim_t m_max, \

View File

@@ -42,7 +42,6 @@ typedef void (*FUNCPTR_T)(
diag_t diagc,
uplo_t uploc,
trans_t transc,
bool_t densify,
bool_t invdiag,
bool_t revifup,
bool_t reviflo,
@@ -60,8 +59,7 @@ static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3);
void bl2_packm_blk_var3( obj_t* beta,
obj_t* c,
obj_t* p,
packm_t* cntl )
obj_t* p )
{
num_t dt_cp = bl2_obj_datatype( *c );
mem_t* mem_p = bl2_obj_pack_mem( *p );
@@ -71,10 +69,9 @@ void bl2_packm_blk_var3( obj_t* beta,
diag_t diagc = bl2_obj_diag( *c );
uplo_t uploc = bl2_obj_uplo( *c );
trans_t transc = bl2_obj_conjtrans_status( *c );
bool_t densify = cntl_does_densify( cntl );
bool_t invdiag = cntl_does_invert_diag( cntl );
bool_t revifup = cntl_rev_iter_if_upper( cntl );
bool_t reviflo = cntl_rev_iter_if_lower( cntl );
bool_t invdiag = bl2_obj_has_inverted_diag( *p );
bool_t revifup = bl2_obj_is_pack_rev_if_upper( *p );
bool_t reviflo = bl2_obj_is_pack_rev_if_lower( *p );
dim_t m_p = bl2_obj_length( *p );
dim_t n_p = bl2_obj_width( *p );
@@ -104,7 +101,6 @@ void bl2_packm_blk_var3( obj_t* beta,
diagc,
uploc,
transc,
densify,
invdiag,
revifup,
reviflo,
@@ -127,7 +123,6 @@ void PASTEMAC(ch,varname )( \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
bool_t densify, \
bool_t invdiag, \
bool_t revifup, \
bool_t reviflo, \
@@ -335,8 +330,7 @@ void PASTEMAC(ch,varname )( \
p_use, rs_p, cs_p ); \
} \
\
/* If requested, densify the unstored part of the packed panel. */ \
if ( densify == TRUE ) \
/* Always densify the unstored part of the packed panel. */ \
{ \
doff_t diagoffp = diagoffc_i - panel_off_i; \
uplo_t uplop = uploc; \

View File

@@ -34,8 +34,7 @@
void bl2_packm_blk_var3( obj_t* beta,
obj_t* c,
obj_t* p,
packm_t* cntl );
obj_t* p );
#undef GENTPROT
@@ -47,7 +46,6 @@ void PASTEMAC(ch,varname)( \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
bool_t densify, \
bool_t invdiag, \
bool_t revifup, \
bool_t reviflo, \

View File

@@ -48,8 +48,15 @@ void bl2_packm_init( obj_t* a,
// 3. cast only: Not yet supported / not used.
// 4. no-op: The control tree sometimes directs us to skip the
// pack operation entirely. Alias p to a and return.
obj_t c;
num_t datatype;
bool_t needs_densify;
invdiag_t invert_diag;
pack_t pack_schema;
packord_t pack_ord_if_up;
packord_t pack_ord_if_lo;
dim_t mult_m;
dim_t mult_n;
obj_t c;
// Check parameters.
if ( bl2_error_checking_is_enabled() )
@@ -116,15 +123,229 @@ void bl2_packm_init( obj_t* a,
bl2_obj_alias_to( *a, c );
}
// Extract various fields from the control tree and pass them in
// explicitly into _init_pack(). This allows external code generators
// the option of bypassing usage of control trees altogether.
datatype = bl2_obj_datatype( *a );
needs_densify = cntl_does_densify( cntl );
pack_schema = cntl_pack_schema( cntl );
mult_m = bl2_blksz_for_type( datatype, cntl_mult_m( cntl ) );
mult_n = bl2_blksz_for_type( datatype, cntl_mult_n( cntl ) );
if ( cntl_does_invert_diag( cntl ) ) invert_diag = BLIS_INVERT_DIAG;
else invert_diag = BLIS_NO_INVERT_DIAG;
if ( cntl_rev_iter_if_upper( cntl ) ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER;
else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER;
if ( cntl_rev_iter_if_lower( cntl ) ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER;
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER;
// Initialize object p for the final packed matrix.
bl2_packm_init_pack( &c,
p,
cntl );
bl2_packm_init_pack( needs_densify,
invert_diag,
pack_schema,
pack_ord_if_up,
pack_ord_if_lo,
mult_m,
mult_n,
&c,
p );
// Now p is ready to be packed.
}
void bl2_packm_init_pack( bool_t densify,
invdiag_t invert_diag,
pack_t pack_schema,
packord_t pack_ord_if_up,
packord_t pack_ord_if_lo,
dim_t mult_m,
dim_t mult_n,
obj_t* c,
obj_t* p )
{
// In this function, we initialize an object p to represent the packed
// copy of the intermediate object c. At this point, the datatype of
// object c should be equal to the target datatype of the original
// object, either because:
// (1) c is set up to contain the typecast of the original object, or
// (2) c is aliased to the original object, which would only happen
// when the original object's datatype and target datatype are
// equal.
// So here, we want to create an object p that is identical to c, except
// that:
// (1) the dimensions of p are explicitly transposed, if c needs
// transposition,
// (2) if c needs transposition, we adjust the diagonal offset of p
// and we also either set the uplo of p to dense (if we are going
// to densify), or to its toggled value.
// (3) the view offset of p is reset to (0,0),
// (4) object p contains a pack schema field that reflects its desired
// packing,
// (5) object p's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available, (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
// (6) object p gets new stride information based on the pack schema
// embedded in the control tree node.
trans_t transc = bl2_obj_trans_status( *c );
dim_t m_c = bl2_obj_length( *c );
dim_t n_c = bl2_obj_width( *c );
inc_t rs_p, cs_p;
// We begin by copying the basic fields of c.
bl2_obj_alias_to( *c, *p );
// Update the dimension fields to explicitly reflect a transposition,
// if needed.
// Then, clear the conjugation and transposition fields from the object
// since matrix packing in BLIS is deemed to take care of all conjugation
// and transposition necessary.
// Then, we adjust the properties of p when c needs a transposition.
// We negate the diagonal offset, and if c is upper- or lower-stored,
// we either toggle the uplo of p.
// Finally, if we are going to densify c, we mark p as dense.
bl2_obj_set_dims_with_trans( transc, m_c, n_c, *p );
bl2_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
if ( bl2_does_trans( transc ) )
{
bl2_obj_negate_diag_offset( *p );
if ( bl2_obj_is_upper_or_lower( *c ) )
bl2_obj_toggle_uplo( *p );
}
if ( densify ) bl2_obj_set_uplo( BLIS_DENSE, *p );
// Reset the view offsets to (0,0).
bl2_obj_set_offs( 0, 0, *p );
// Set the invert diagonal field.
bl2_obj_set_invert_diag( invert_diag, *p );
// Set the pack status of p to the pack schema prescribed in the control
// tree node.
bl2_obj_set_pack_schema( pack_schema, *p );
// Set the packing order bits.
bl2_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
bl2_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
// Check the mem_t entry of p associated with the pack buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of p to the cached address of the pack memory.
bl2_obj_set_buffer_with_cached_packm_mem( *p, *p, mult_m, mult_n );
// Set the row and column strides of p based on the pack schema.
if ( pack_schema == BLIS_PACKED_ROWS )
{
mem_t* mem;
// Access the mem_t entry cached in p.
mem = bl2_obj_pack_mem( *p );
// For regular row storage, the n dimension used when acquiring the
// pack memory should be used for our row stride, with the column
// stride set to one. By using the WIDTH of the mem_t region, we
// allow for zero-padding (if necessary/desired) along the right
// edge of the matrix.
rs_p = bl2_mem_width( mem );
cs_p = 1;
bl2_obj_set_incs( rs_p, cs_p, *p );
}
else if ( pack_schema == BLIS_PACKED_COLUMNS )
{
mem_t* mem;
// Access the mem_t entry cached in p.
mem = bl2_obj_pack_mem( *p );
// For regular column storage, the m dimension used when acquiring the
// pack memory should be used for our column stride, with the row
// stride set to one. By using the LENGTH of the mem_t region, we
// allow for zero-padding (if necessary/desired) along the bottom
// edge of the matrix.
cs_p = bl2_mem_length( mem );
rs_p = 1;
bl2_obj_set_incs( rs_p, cs_p, *p );
}
else if ( pack_schema == BLIS_PACKED_ROW_PANELS )
{
mem_t* mem = bl2_obj_pack_mem( *p );
dim_t m_panel;
dim_t ps_p;
// The maximum panel length (for each datatype) should be equal to
// the m dimension multiple field of the control tree node.
m_panel = mult_m;
// The "column stride" of a row panel packed object is interpreted as
// the column stride WITHIN a panel. Thus, this is equal to the panel
// length.
cs_p = m_panel;
// The "row stride" of a row panel packed object is interpreted
// as the row stride WITHIN a panel. Thus, it is unit.
rs_p = 1;
// The "panel stride" of a panel packed object is interpreted as the
// distance between the (0,0) element of panel k and the (0,0)
// element of panel k+1. We use the WIDTH of the mem_t region to
// determine the panel "width"; this will allow for zero-padding
// (if necessary/desired) along the far end of each panel (ie: the
// right edge of the matrix).
ps_p = cs_p * bl2_mem_width( mem );
// Store the strides in p.
bl2_obj_set_incs( rs_p, cs_p, *p );
bl2_obj_set_panel_stride( ps_p, *p );
}
else if ( pack_schema == BLIS_PACKED_COL_PANELS )
{
mem_t* mem = bl2_obj_pack_mem( *p );
dim_t n_panel;
dim_t ps_p;
// The maximum panel width (for each datatype) should be equal to
// the n dimension multiple field of the control tree node.
n_panel = mult_n;
// The "row stride" of a column panel packed object is interpreted as
// the row stride WITHIN a panel. Thus, it is equal to the panel
// width.
rs_p = n_panel;
// The "column stride" of a column panel packed object is interpreted
// as the column stride WITHIN a panel. Thus, it is unit.
cs_p = 1;
// The "panel stride" of a panel packed object is interpreted as the
// distance between the (0,0) element of panel k and the (0,0)
// element of panel k+1. We use the LENGTH of the mem_t region to
// determine the panel "length"; this will allow for zero-padding
// (if necessary/desired) along the far end of each panel (ie: the
// bottom edge of the matrix).
ps_p = bl2_mem_length( mem ) * rs_p;
// Store the strides in p.
bl2_obj_set_incs( rs_p, cs_p, *p );
bl2_obj_set_panel_stride( ps_p, *p );
}
else
{
// If the pack schema is something else, we assume stride information
// of p is set later on, by the implementation.
}
}
/*
void bl2_packm_init_cast( obj_t* a,
obj_t* p,
obj_t* c )
@@ -173,187 +394,5 @@ void bl2_packm_init_cast( obj_t* a,
rs_c = 1;
bl2_obj_set_incs( rs_c, cs_c, *c );
}
void bl2_packm_init_pack( obj_t* c,
obj_t* p,
packm_t* cntl )
{
// In this function, we initialize an object p to represent the packed
// copy of the intermediate object c. At this point, the datatype of
// object c should be equal to the target datatype of the original
// object, either because:
// (1) c is set up to contain the typecast of the original object, or
// (2) c is aliased to the original object, which would only happen
// when the original object's datatype and target datatype are
// equal.
// So here, we want to create an object p that is identical to c, except
// that:
// (1) the dimensions of p are explicitly transposed, if c needs
// transposition,
// (2) if c needs transposition, we adjust the diagonal offset of p
// and we also either set the uplo of p to dense (if we are going
// to densify), or to its toggled value.
// (3) the view offset of p is reset to (0,0),
// (4) object p contains a pack schema field that reflects its desired
// packing,
// (5) object p's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available, (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
// (6) object p gets new stride information based on the pack schema
// embedded in the control tree node.
trans_t transc = bl2_obj_trans_status( *c );
dim_t m_c = bl2_obj_length( *c );
dim_t n_c = bl2_obj_width( *c );
pack_t pack_schema = cntl_pack_schema( cntl );
bool_t densify = cntl_does_densify( cntl );
inc_t rs_p, cs_p;
// We begin by copying the basic fields of c.
bl2_obj_alias_to( *c, *p );
// Update the dimension fields to explicitly reflect a transposition,
// if needed.
// Then, clear the conjugation and transposition fields from the object
// since matrix packing in BLIS is deemed to take care of all conjugation
// and transposition necessary.
// Then, we adjust the properties of p when c needs a transposition.
// We negate the diagonal offset, and if c is upper- or lower-stored,
// we either toggle the uplo of p.
// Finally, if we are going to densify c, we mark p as dense.
bl2_obj_set_dims_with_trans( transc, m_c, n_c, *p );
bl2_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
if ( bl2_does_trans( transc ) )
{
bl2_obj_negate_diag_offset( *p );
if ( bl2_obj_is_upper_or_lower( *c ) )
bl2_obj_toggle_uplo( *p );
}
if ( densify ) bl2_obj_set_uplo( BLIS_DENSE, *p );
// Reset the view offsets to (0,0).
bl2_obj_set_offs( 0, 0, *p );
// Set the pack status of p to the pack schema prescribed in the control
// tree node.
bl2_obj_set_pack_schema( pack_schema, *p );
// Check the mem_t entry of p associated with the pack buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of p to the cached address of the pack memory.
bl2_obj_set_buffer_with_cached_packm_mem( *p, *p, cntl );
// Set the row and column strides of p based on the pack schema.
if ( pack_schema == BLIS_PACKED_ROWS )
{
mem_t* mem;
// Access the mem_t entry cached in p.
mem = bl2_obj_pack_mem( *p );
// For regular row storage, the n dimension used when acquiring the
// pack memory should be used for our row stride, with the column
// stride set to one. By using the WIDTH of the mem_t region, we
// allow for zero-padding (if necessary/desired) along the right
// edge of the matrix.
rs_p = bl2_mem_width( mem );
cs_p = 1;
bl2_obj_set_incs( rs_p, cs_p, *p );
}
else if ( pack_schema == BLIS_PACKED_COLUMNS )
{
mem_t* mem;
// Access the mem_t entry cached in p.
mem = bl2_obj_pack_mem( *p );
// For regular column storage, the m dimension used when acquiring the
// pack memory should be used for our column stride, with the row
// stride set to one. By using the LENGTH of the mem_t region, we
// allow for zero-padding (if necessary/desired) along the bottom
// edge of the matrix.
cs_p = bl2_mem_length( mem );
rs_p = 1;
bl2_obj_set_incs( rs_p, cs_p, *p );
}
else if ( pack_schema == BLIS_PACKED_ROW_PANELS )
{
mem_t* mem = bl2_obj_pack_mem( *p );
blksz_t* mult_m = cntl_mult_m( cntl );
dim_t m_panel;
dim_t ps_p;
// The maximum panel length (for each datatype) should be equal to
// the m dimension multiple field of the control tree node. Here we
// access the specific value corresponding to the datatype of p.
m_panel = bl2_blksz_for_type( bl2_obj_datatype( *p ),
mult_m );
// The "column stride" of a row panel packed object is interpreted as
// the column stride WITHIN a panel. Thus, this is equal to the panel
// length.
cs_p = m_panel;
// The "row stride" of a row panel packed object is interpreted
// as the row stride WITHIN a panel. Thus, it is unit.
rs_p = 1;
// The "panel stride" of a panel packed object is interpreted as the
// distance between the (0,0) element of panel k and the (0,0)
// element of panel k+1. We use the WIDTH of the mem_t region to
// determine the panel "width"; this will allow for zero-padding
// (if necessary/desired) along the far end of each panel (ie: the
// right edge of the matrix).
ps_p = cs_p * bl2_mem_width( mem );
// Store the strides in p.
bl2_obj_set_incs( rs_p, cs_p, *p );
bl2_obj_set_panel_stride( ps_p, *p );
}
else if ( pack_schema == BLIS_PACKED_COL_PANELS )
{
mem_t* mem = bl2_obj_pack_mem( *p );
blksz_t* mult_n = cntl_mult_n( cntl );
dim_t n_panel;
dim_t ps_p;
// The maximum panel width (for each datatype) should be equal to
// the n dimension multiple field of the control tree node. Here we
// access the specific value corresponding to the datatype of p.
n_panel = bl2_blksz_for_type( bl2_obj_datatype( *p ),
mult_n );
// The "row stride" of a column panel packed object is interpreted as
// the row stride WITHIN a panel. Thus, it is equal to the panel
// width.
rs_p = n_panel;
// The "column stride" of a column panel packed object is interpreted
// as the column stride WITHIN a panel. Thus, it is unit.
cs_p = 1;
// The "panel stride" of a panel packed object is interpreted as the
// distance between the (0,0) element of panel k and the (0,0)
// element of panel k+1. We use the LENGTH of the mem_t region to
// determine the panel "length"; this will allow for zero-padding
// (if necessary/desired) along the far end of each panel (ie: the
// bottom edge of the matrix).
ps_p = bl2_mem_length( mem ) * rs_p;
// Store the strides in p.
bl2_obj_set_incs( rs_p, cs_p, *p );
bl2_obj_set_panel_stride( ps_p, *p );
}
else
{
// If the pack schema is something else, we assume stride information
// of p is set later on, by the implementation.
}
}
*/

View File

@@ -36,11 +36,18 @@ void bl2_packm_init( obj_t* a,
obj_t* p,
packm_t* cntl );
void bl2_packm_init_pack( bool_t densify,
invdiag_t invert_diag,
pack_t pack_schema,
packord_t pack_ord_if_up,
packord_t pack_ord_if_lo,
dim_t mult_m,
dim_t mult_n,
obj_t* c,
obj_t* p );
/*
void bl2_packm_init_cast( obj_t* a,
obj_t* p,
obj_t* c );
void bl2_packm_init_pack( obj_t* c,
obj_t* p,
packm_t* cntl );
*/

View File

@@ -38,13 +38,12 @@
typedef void (*FUNCPTR_T)( obj_t* beta,
obj_t* a,
obj_t* p,
packm_t* cntl );
obj_t* p );
static FUNCPTR_T vars[6][3] =
{
// unblocked optimized unblocked blocked
{ bl2_packm_unb_var1, NULL, bl2_packm_blk_var1 },
{ NULL, NULL, NULL, },
{ NULL, NULL, bl2_packm_blk_var2 },
{ NULL, NULL, bl2_packm_blk_var3 },
{ NULL, NULL, NULL, },
@@ -124,7 +123,6 @@ void bl2_packm_int( obj_t* beta,
// Invoke the variant with beta_use.
f( beta_use,
a,
p,
cntl );
p );
}

View File

@@ -150,6 +150,7 @@ void bl2_unpackm_int( obj_t* p,
*/
}
/*
void bl2_unpackm_init_cast( obj_t* p,
obj_t* a,
obj_t* c )
@@ -199,4 +200,4 @@ void bl2_unpackm_init_cast( obj_t* p,
rs_c = 1;
bl2_obj_set_incs( rs_c, cs_c, *c );
}
*/

View File

@@ -36,6 +36,8 @@ void bl2_unpackm_int( obj_t* p,
obj_t* a,
unpackm_t* cntl );
/*
void bl2_unpackm_init_cast( obj_t* p,
obj_t* a,
obj_t* c );
*/

View File

@@ -179,6 +179,18 @@
\
( ( (obj).info & BLIS_UNIT_DIAG_BIT ) == BLIS_BITVAL_UNIT_DIAG )
#define bl2_obj_has_inverted_diag( obj ) \
\
( ( (obj).info & BLIS_INVERT_DIAG_BIT ) == BLIS_BITVAL_INVERT_DIAG )
#define bl2_obj_is_pack_rev_if_upper( obj ) \
\
( ( (obj).info & BLIS_PACK_REV_IF_UPPER_BIT ) == BLIS_BITVAL_PACK_REV_IF_UPPER )
#define bl2_obj_is_pack_rev_if_lower( obj ) \
\
( ( (obj).info & BLIS_PACK_REV_IF_LOWER_BIT ) == BLIS_BITVAL_PACK_REV_IF_LOWER )
#define bl2_obj_pack_status( obj ) \
\
( (obj).info & BLIS_PACK_BITS )
@@ -231,6 +243,11 @@
(obj).info = ( (obj).info & ~BLIS_UNIT_DIAG_BIT ) | (diag); \
}
#define bl2_obj_set_invert_diag( inv_diag, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_INVERT_DIAG_BIT ) | (inv_diag); \
}
#define bl2_obj_set_datatype( dt, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_DATATYPE_BITS ) | (dt); \
@@ -251,6 +268,16 @@
(obj).info = ( (obj).info & ~BLIS_PACK_BITS ) | (pack); \
}
#define bl2_obj_set_pack_order_if_upper( packordifup, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_PACK_REV_IF_UPPER_BIT ) | (packordifup); \
}
#define bl2_obj_set_pack_order_if_lower( packordiflo, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_PACK_REV_IF_LOWER_BIT ) | (packordiflo); \
}
#define bl2_obj_set_struc( struc, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_STRUC_BITS ) | (struc); \
@@ -813,15 +840,13 @@ bl2_obj_width_stored( obj )
// Set an object's buffer to one previously cached for packing
// matrices (or acquire one and cache it)
#define bl2_obj_set_buffer_with_cached_packm_mem( p, obj, cntl ) \
#define bl2_obj_set_buffer_with_cached_packm_mem( p, obj, mult_m, mult_n ) \
{ \
mem_t* mem_p = bl2_obj_pack_mem( p ); \
num_t dt = bl2_obj_datatype( obj ); \
siz_t elem_size = bl2_obj_elem_size( obj ); \
dim_t m = bl2_obj_length( obj ); \
dim_t n = bl2_obj_width( obj ); \
dim_t mult_m = bl2_blksz_for_type( dt, cntl_mult_m( cntl ) ); \
dim_t mult_n = bl2_blksz_for_type( dt, cntl_mult_n( cntl ) ); \
bool_t needs_alloc; \
dim_t m_needed; \
dim_t n_needed; \
@@ -872,13 +897,12 @@ bl2_obj_width_stored( obj )
// Set an object's buffer to one previously cached for packing
// vectors (or acquire one and cache it)
#define bl2_obj_set_buffer_with_cached_packv_mem( p, obj, cntl ) \
#define bl2_obj_set_buffer_with_cached_packv_mem( p, obj, mult_m ) \
{ \
mem_t* mem_p = bl2_obj_pack_mem( p ); \
num_t dt = bl2_obj_datatype( obj ); \
siz_t elem_size = bl2_obj_elem_size( obj ); \
dim_t m = bl2_obj_vector_dim( obj ); \
dim_t mult_m = bl2_blksz_for_type( dt, cntl_mult_dim( cntl ) ); \
bool_t needs_alloc; \
dim_t m_needed; \
void* buf; \

View File

@@ -92,22 +92,25 @@ typedef dcomplex atom_t;
typedef unsigned long int info_t;
#define BLIS_DOMAIN_BIT 0x01
#define BLIS_PRECISION_BIT 0x02
#define BLIS_SPECIAL_BIT 0x04
#define BLIS_DATATYPE_BITS 0x07
#define BLIS_TRANS_BIT 0x08
#define BLIS_CONJ_BIT 0x10
#define BLIS_CONJTRANS_BITS 0x18
#define BLIS_UPPER_BIT 0x20
#define BLIS_DIAG_BIT 0x40
#define BLIS_LOWER_BIT 0x80
#define BLIS_UPLO_BITS 0xE0
#define BLIS_UNIT_DIAG_BIT 0x100
#define BLIS_TARGET_DT_BITS 0xE00
#define BLIS_EXECUTION_DT_BITS 0x7000
#define BLIS_PACK_BITS 0x38000
#define BLIS_STRUC_BITS 0xC0000
#define BLIS_DOMAIN_BIT 0x01
#define BLIS_PRECISION_BIT 0x02
#define BLIS_SPECIAL_BIT 0x04
#define BLIS_DATATYPE_BITS 0x07
#define BLIS_TRANS_BIT 0x08
#define BLIS_CONJ_BIT 0x10
#define BLIS_CONJTRANS_BITS 0x18
#define BLIS_UPPER_BIT 0x20
#define BLIS_DIAG_BIT 0x40
#define BLIS_LOWER_BIT 0x80
#define BLIS_UPLO_BITS 0xE0
#define BLIS_UNIT_DIAG_BIT 0x100
#define BLIS_INVERT_DIAG_BIT 0x200
#define BLIS_TARGET_DT_BITS 0x1C00
#define BLIS_EXECUTION_DT_BITS 0xE000
#define BLIS_PACK_BITS 0x70000
#define BLIS_PACK_REV_IF_UPPER_BIT 0x80000
#define BLIS_PACK_REV_IF_LOWER_BIT 0x100000
#define BLIS_STRUC_BITS 0x600000
#define BLIS_BITVAL_REAL 0x00
#define BLIS_BITVAL_COMPLEX 0x01
@@ -129,25 +132,30 @@ typedef unsigned long int info_t;
#define BLIS_BITVAL_DENSE 0xE0
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
#define BLIS_BITVAL_UNIT_DIAG 0x100
#define BLIS_BITVAL_INVERT_DIAG 0x200
#define BLIS_BITVAL_NOT_PACKED 0x0
#define BLIS_BITVAL_PACKED_UNSPEC 0x8000
#define BLIS_BITVAL_PACKED_VECTOR 0x10000
#define BLIS_BITVAL_PACKED_ROWS 0x18000
#define BLIS_BITVAL_PACKED_COLUMNS 0x20000
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x28000
#define BLIS_BITVAL_PACKED_COL_PANELS 0x30000
#define BLIS_BITVAL_PACKED_UNSPEC 0x10000
#define BLIS_BITVAL_PACKED_VECTOR 0x20000
#define BLIS_BITVAL_PACKED_ROWS 0x30000
#define BLIS_BITVAL_PACKED_COLUMNS 0x40000
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x50000
#define BLIS_BITVAL_PACKED_COL_PANELS 0x60000
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
#define BLIS_BITVAL_PACK_REV_IF_UPPER 0x80000
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
#define BLIS_BITVAL_PACK_REV_IF_LOWER 0x100000
#define BLIS_BITVAL_GENERAL 0x0
#define BLIS_BITVAL_HERMITIAN 0x40000
#define BLIS_BITVAL_SYMMETRIC 0x80000
#define BLIS_BITVAL_TRIANGULAR 0xC0000
#define BLIS_BITVAL_HERMITIAN 0x200000
#define BLIS_BITVAL_SYMMETRIC 0x400000
#define BLIS_BITVAL_TRIANGULAR 0x600000
#define BLIS_TARGET_DT_SHIFT 9
#define BLIS_EXECUTION_DT_SHIFT 12
#define BLIS_TARGET_DT_SHIFT 10
#define BLIS_EXECUTION_DT_SHIFT 13
/*
info field description
13 12 11 10 F E D C B A 9 8 7 6 5 4 3 2 1 0
16 15 14 13 12 11 10 F E D C B A 9 8 7 6 5 4 3 2 1 0
bit(s) purpose
------ -------
@@ -162,15 +170,16 @@ typedef unsigned long int info_t;
- 6: diagonal
- 7: strictly lower triangular
8 Implicit unit diagonal?
B ~ 9 Target numerical datatype
- 9: domain (0 == real, 1 == complex)
- A: precision (0 == single, 1 == double)
- B: unused
E ~ C Execution numerical datatype
- C: domain (0 == real, 1 == complex)
- D: precision (0 == single, 1 == double)
- E: unused
11 ~ F Packed type/status
9 Invert diagonal required [during pack]?
C ~ A Target numerical datatype
- A: domain (0 == real, 1 == complex)
- B: precision (0 == single, 1 == double)
- C: unused
F ~ D Execution numerical datatype
- D: domain (0 == real, 1 == complex)
- E: precision (0 == single, 1 == double)
- F: unused
12 ~ 10 Packed type/status
- 0 == not packed
- 1 == packed (unspecified; row or column)
- 2 == packed vector
@@ -179,7 +188,13 @@ typedef unsigned long int info_t;
- 5 == packed by row panels
- 6 == packed by column panels
- 7 == unused
13 ~ 12 Structure type
13 Packed panel order if upper-stored
- 0 == forward order if upper
- 1 == reverse order if upper
14 Packed panel order if lower-stored
- 0 == forward order if lower
- 1 == reverse order if lower
16 ~ 15 Structure type
- (00 == general, 01 == Hermitian)
- (10 == symmetric, 11 == triangular)
*/
@@ -302,6 +317,12 @@ typedef enum
BLIS_UNIT_DIAG = BLIS_BITVAL_UNIT_DIAG
} diag_t;
typedef enum
{
BLIS_NO_INVERT_DIAG = 0x0,
BLIS_INVERT_DIAG = BLIS_BITVAL_INVERT_DIAG
} invdiag_t;
typedef enum
{
BLIS_GENERAL = BLIS_BITVAL_GENERAL,
@@ -349,6 +370,15 @@ typedef enum
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS
} pack_t;
typedef enum
{
BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER,
BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER,
BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER,
BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER
} packord_t;
// Subpartition type

View File

@@ -1 +1 @@
0.0.2-3
0.0.2-5