Consolidated pack_t enums; retired VECTOR value.

Details:
- Changed the pack_t enumerations so that BLIS_PACKED_VECTOR no longer has
  its own value, and instead simply aliases to BLIS_PACKED_UNSPEC. This
  makes room in the three pack_t bits of the info field of obj_t so that
  two values are now unused, and may be used for other future purposes.
- Updated sloppy terminology usage in comments in level-2 front-ends.
  (Replaced "is contiguous" with more accurate "has unit stride".)
This commit is contained in:
Field G. Van Zee
2014-01-15 11:40:12 -06:00
parent ddc8c1c379
commit d628bf1da1
11 changed files with 147 additions and 148 deletions

View File

@@ -49,9 +49,9 @@ void bli_gemv( obj_t* alpha,
num_t dt_targ_a;
num_t dt_targ_x;
num_t dt_targ_y;
bool_t a_is_contig;
bool_t x_is_contig;
bool_t y_is_contig;
bool_t a_has_unit_inc;
bool_t x_has_unit_inc;
bool_t y_has_unit_inc;
obj_t alpha_local;
obj_t beta_local;
num_t dt_alpha;
@@ -67,11 +67,11 @@ void bli_gemv( obj_t* alpha,
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
// Determine whether each operand is stored contiguously.
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
// Determine whether each operand is stored with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -95,11 +95,11 @@ void bli_gemv( obj_t* alpha,
&beta_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( a_is_contig &&
x_is_contig &&
y_is_contig )
if ( a_has_unit_inc &&
x_has_unit_inc &&
y_has_unit_inc )
{
// A row-major layout with no transpose is typically best served by
// a dot-based implementation (and the same goes for a column-major
@@ -123,9 +123,9 @@ void bli_gemv( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -48,9 +48,9 @@ void bli_ger( obj_t* alpha,
num_t dt_targ_x;
num_t dt_targ_y;
//num_t dt_targ_a;
bool_t x_is_contig;
bool_t y_is_contig;
bool_t a_is_contig;
bool_t x_has_unit_inc;
bool_t y_has_unit_inc;
bool_t a_has_unit_inc;
obj_t alpha_local;
num_t dt_alpha;
@@ -64,11 +64,11 @@ void bli_ger( obj_t* alpha,
dt_targ_y = bli_obj_target_datatype( *y );
//dt_targ_a = bli_obj_target_datatype( *a );
// Determine whether each operand is stored contiguously.
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -80,11 +80,12 @@ void bli_ger( obj_t* alpha,
alpha,
&alpha_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( x_is_contig &&
y_is_contig &&
a_is_contig )
if ( x_has_unit_inc &&
y_has_unit_inc &&
a_has_unit_inc )
{
// Use different control trees depending on storage of the matrix
// operand.
@@ -95,9 +96,9 @@ void bli_ger( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -49,9 +49,9 @@ void bli_hemv( obj_t* alpha,
num_t dt_targ_a;
num_t dt_targ_x;
num_t dt_targ_y;
bool_t a_is_contig;
bool_t x_is_contig;
bool_t y_is_contig;
bool_t a_has_unit_inc;
bool_t x_has_unit_inc;
bool_t y_has_unit_inc;
obj_t alpha_local;
obj_t beta_local;
num_t dt_alpha;
@@ -67,11 +67,11 @@ void bli_hemv( obj_t* alpha,
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
// Determine whether each operand is stored contiguously.
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -95,11 +95,11 @@ void bli_hemv( obj_t* alpha,
&beta_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( a_is_contig &&
x_is_contig &&
y_is_contig )
if ( a_has_unit_inc &&
x_has_unit_inc &&
y_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of upper/lower triangular storage and row/column-storage.
@@ -120,9 +120,9 @@ void bli_hemv( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -46,8 +46,8 @@ void bli_her( obj_t* alpha,
her_t* her_cntl;
num_t dt_targ_x;
//num_t dt_targ_c;
bool_t x_is_contig;
bool_t c_is_contig;
bool_t x_has_unit_inc;
bool_t c_has_unit_inc;
obj_t alpha_local;
num_t dt_alpha;
@@ -60,10 +60,10 @@ void bli_her( obj_t* alpha,
dt_targ_x = bli_obj_target_datatype( *x );
//dt_targ_c = bli_obj_target_datatype( *c );
// Determine whether each operand is stored contiguously.
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Create object to hold a copy-cast of alpha.
@@ -74,10 +74,10 @@ void bli_her( obj_t* alpha,
&alpha_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( x_is_contig &&
c_is_contig )
if ( x_has_unit_inc &&
c_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of upper/lower triangular storage and row/column-storage.
@@ -98,8 +98,8 @@ void bli_her( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -48,9 +48,9 @@ void bli_her2( obj_t* alpha,
num_t dt_targ_x;
num_t dt_targ_y;
//num_t dt_targ_c;
bool_t x_is_contig;
bool_t y_is_contig;
bool_t c_is_contig;
bool_t x_has_unit_inc;
bool_t y_has_unit_inc;
bool_t c_has_unit_inc;
obj_t alpha_local;
obj_t alpha_conj_local;
num_t dt_alpha;
@@ -65,11 +65,11 @@ void bli_her2( obj_t* alpha,
dt_targ_y = bli_obj_target_datatype( *y );
//dt_targ_c = bli_obj_target_datatype( *c );
// Determine whether each operand is stored contiguously.
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -87,11 +87,11 @@ void bli_her2( obj_t* alpha,
&alpha_conj_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( x_is_contig &&
y_is_contig &&
c_is_contig )
if ( x_has_unit_inc &&
y_has_unit_inc &&
c_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of upper/lower triangular storage and row/column-storage.
@@ -112,9 +112,9 @@ void bli_her2( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -49,9 +49,9 @@ void bli_symv( obj_t* alpha,
num_t dt_targ_a;
num_t dt_targ_x;
num_t dt_targ_y;
bool_t a_is_contig;
bool_t x_is_contig;
bool_t y_is_contig;
bool_t a_has_unit_inc;
bool_t x_has_unit_inc;
bool_t y_has_unit_inc;
obj_t alpha_local;
obj_t beta_local;
num_t dt_alpha;
@@ -67,11 +67,11 @@ void bli_symv( obj_t* alpha,
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
// Determine whether each operand is stored contiguously.
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -95,11 +95,11 @@ void bli_symv( obj_t* alpha,
&beta_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( a_is_contig &&
x_is_contig &&
y_is_contig )
if ( a_has_unit_inc &&
x_has_unit_inc &&
y_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of upper/lower triangular storage and row/column-storage.
@@ -120,9 +120,9 @@ void bli_symv( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -46,8 +46,8 @@ void bli_syr( obj_t* alpha,
her_t* her_cntl;
num_t dt_targ_x;
num_t dt_targ_c;
bool_t x_is_contig;
bool_t c_is_contig;
bool_t x_has_unit_inc;
bool_t c_has_unit_inc;
obj_t alpha_local;
num_t dt_alpha;
@@ -60,10 +60,10 @@ void bli_syr( obj_t* alpha,
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_c = bli_obj_target_datatype( *c );
// Determine whether each operand is stored contiguously.
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -76,10 +76,10 @@ void bli_syr( obj_t* alpha,
&alpha_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( x_is_contig &&
c_is_contig )
if ( x_has_unit_inc &&
c_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of upper/lower triangular storage and row/column-storage.
@@ -100,8 +100,8 @@ void bli_syr( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -48,9 +48,9 @@ void bli_syr2( obj_t* alpha,
num_t dt_targ_x;
num_t dt_targ_y;
//num_t dt_targ_c;
bool_t x_is_contig;
bool_t y_is_contig;
bool_t c_is_contig;
bool_t x_has_unit_inc;
bool_t y_has_unit_inc;
bool_t c_has_unit_inc;
obj_t alpha_local;
num_t dt_alpha;
@@ -64,11 +64,11 @@ void bli_syr2( obj_t* alpha,
dt_targ_y = bli_obj_target_datatype( *y );
//dt_targ_c = bli_obj_target_datatype( *c );
// Determine whether each operand is stored contiguously.
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -80,11 +80,11 @@ void bli_syr2( obj_t* alpha,
&alpha_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( x_is_contig &&
y_is_contig &&
c_is_contig )
if ( x_has_unit_inc &&
y_has_unit_inc &&
c_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of upper/lower triangular storage and row/column-storage.
@@ -105,9 +105,9 @@ void bli_syr2( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -46,8 +46,8 @@ void bli_trmv( obj_t* alpha,
trmv_t* trmv_cntl;
num_t dt_targ_a;
num_t dt_targ_x;
bool_t a_is_contig;
bool_t x_is_contig;
bool_t a_has_unit_inc;
bool_t x_has_unit_inc;
obj_t alpha_local;
num_t dt_alpha;
@@ -60,10 +60,10 @@ void bli_trmv( obj_t* alpha,
dt_targ_a = bli_obj_target_datatype( *a );
dt_targ_x = bli_obj_target_datatype( *x );
// Determine whether each operand is stored contiguously.
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -75,10 +75,11 @@ void bli_trmv( obj_t* alpha,
alpha,
&alpha_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( a_is_contig &&
x_is_contig )
if ( a_has_unit_inc &&
x_has_unit_inc )
{
// We use two control trees to handle the four cases corresponding to
// combinations of transposition and row/column-storage.
@@ -99,8 +100,8 @@ void bli_trmv( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -46,8 +46,8 @@ void bli_trsv( obj_t* alpha,
trsv_t* trsv_cntl;
num_t dt_targ_a;
num_t dt_targ_x;
bool_t a_is_contig;
bool_t x_is_contig;
bool_t a_has_unit_inc;
bool_t x_has_unit_inc;
obj_t alpha_local;
num_t dt_alpha;
@@ -60,10 +60,10 @@ void bli_trsv( obj_t* alpha,
dt_targ_a = bli_obj_datatype( *a );
dt_targ_x = bli_obj_datatype( *x );
// Determine whether each operand is stored contiguously.
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -76,10 +76,10 @@ void bli_trsv( obj_t* alpha,
&alpha_local );
// If all operands are contiguous, we choose a control tree for calling
// If all operands have unit stride, we choose a control tree for calling
// the unblocked implementation directly without any blocking.
if ( a_is_contig &&
x_is_contig )
if ( a_has_unit_inc &&
x_has_unit_inc )
{
if ( bli_obj_has_notrans( *a ) )
{
@@ -96,8 +96,8 @@ void bli_trsv( obj_t* alpha,
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.

View File

@@ -240,12 +240,10 @@ typedef struct
#define BLIS_BITVAL_INVERT_DIAG 0x200
#define BLIS_BITVAL_NOT_PACKED 0x0
#define BLIS_BITVAL_PACKED_UNSPEC 0x10000
#define BLIS_BITVAL_PACKED_VECTOR 0x20000
#define BLIS_BITVAL_PACKED_ROWS 0x30000
#define BLIS_BITVAL_PACKED_COLUMNS 0x40000
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x50000
#define BLIS_BITVAL_PACKED_COL_PANELS 0x60000
#define BLIS_BITVAL_PACKED_BLOCKS 0x70000
#define BLIS_BITVAL_PACKED_ROWS 0x20000
#define BLIS_BITVAL_PACKED_COLUMNS 0x30000
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x40000
#define BLIS_BITVAL_PACKED_COL_PANELS 0x50000
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
#define BLIS_BITVAL_PACK_REV_IF_UPPER 0x80000
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
@@ -350,12 +348,11 @@ typedef enum
{
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_VECTOR,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_BLOCKS = BLIS_BITVAL_PACKED_BLOCKS
} pack_t;
@@ -449,12 +446,12 @@ typedef struct blksz_s
- 15: unused
18 ~ 16 Packed type/status
- 0 == not packed
- 1 == packed (unspecified; row or column)
- 2 == packed vector
- 3 == packed by rows
- 4 == packed by columns
- 5 == packed by row panels
- 6 == packed by column panels
- 1 == packed (unspecified; row, column, or vector)
- 2 == packed by rows
- 3 == packed by columns
- 4 == packed by row panels
- 5 == packed by column panels
- 6 == unused
- 7 == unused
19 Packed panel order if upper-stored
- 0 == forward order if upper