mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Consolidated pack_t enums; retired VECTOR value.
Details: - Changed the pack_t enumerations so that BLIS_PACKED_VECTOR no longer has its own value, and instead simply aliases to BLIS_PACKED_UNSPEC. This makes room in the three pack_t bits of the info field of obj_t so that two values are now unused, and may be used for other future purposes. - Updated sloppy terminology usage in comments in level-2 front-ends. (Replaced "is contiguous" with more accurate "has unit stride".)
This commit is contained in:
@@ -49,9 +49,9 @@ void bli_gemv( obj_t* alpha,
|
||||
num_t dt_targ_a;
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_y;
|
||||
bool_t a_is_contig;
|
||||
bool_t x_is_contig;
|
||||
bool_t y_is_contig;
|
||||
bool_t a_has_unit_inc;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t y_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
obj_t beta_local;
|
||||
num_t dt_alpha;
|
||||
@@ -67,11 +67,11 @@ void bli_gemv( obj_t* alpha,
|
||||
dt_targ_x = bli_obj_target_datatype( *x );
|
||||
dt_targ_y = bli_obj_target_datatype( *y );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
// Determine whether each operand is stored with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -95,11 +95,11 @@ void bli_gemv( obj_t* alpha,
|
||||
&beta_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( a_is_contig &&
|
||||
x_is_contig &&
|
||||
y_is_contig )
|
||||
if ( a_has_unit_inc &&
|
||||
x_has_unit_inc &&
|
||||
y_has_unit_inc )
|
||||
{
|
||||
// A row-major layout with no transpose is typically best served by
|
||||
// a dot-based implementation (and the same goes for a column-major
|
||||
@@ -123,9 +123,9 @@ void bli_gemv( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -48,9 +48,9 @@ void bli_ger( obj_t* alpha,
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_y;
|
||||
//num_t dt_targ_a;
|
||||
bool_t x_is_contig;
|
||||
bool_t y_is_contig;
|
||||
bool_t a_is_contig;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t y_has_unit_inc;
|
||||
bool_t a_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
num_t dt_alpha;
|
||||
|
||||
@@ -64,11 +64,11 @@ void bli_ger( obj_t* alpha,
|
||||
dt_targ_y = bli_obj_target_datatype( *y );
|
||||
//dt_targ_a = bli_obj_target_datatype( *a );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -80,11 +80,12 @@ void bli_ger( obj_t* alpha,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( x_is_contig &&
|
||||
y_is_contig &&
|
||||
a_is_contig )
|
||||
if ( x_has_unit_inc &&
|
||||
y_has_unit_inc &&
|
||||
a_has_unit_inc )
|
||||
{
|
||||
// Use different control trees depending on storage of the matrix
|
||||
// operand.
|
||||
@@ -95,9 +96,9 @@ void bli_ger( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -49,9 +49,9 @@ void bli_hemv( obj_t* alpha,
|
||||
num_t dt_targ_a;
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_y;
|
||||
bool_t a_is_contig;
|
||||
bool_t x_is_contig;
|
||||
bool_t y_is_contig;
|
||||
bool_t a_has_unit_inc;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t y_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
obj_t beta_local;
|
||||
num_t dt_alpha;
|
||||
@@ -67,11 +67,11 @@ void bli_hemv( obj_t* alpha,
|
||||
dt_targ_x = bli_obj_target_datatype( *x );
|
||||
dt_targ_y = bli_obj_target_datatype( *y );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -95,11 +95,11 @@ void bli_hemv( obj_t* alpha,
|
||||
&beta_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( a_is_contig &&
|
||||
x_is_contig &&
|
||||
y_is_contig )
|
||||
if ( a_has_unit_inc &&
|
||||
x_has_unit_inc &&
|
||||
y_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
@@ -120,9 +120,9 @@ void bli_hemv( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -46,8 +46,8 @@ void bli_her( obj_t* alpha,
|
||||
her_t* her_cntl;
|
||||
num_t dt_targ_x;
|
||||
//num_t dt_targ_c;
|
||||
bool_t x_is_contig;
|
||||
bool_t c_is_contig;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t c_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
num_t dt_alpha;
|
||||
|
||||
@@ -60,10 +60,10 @@ void bli_her( obj_t* alpha,
|
||||
dt_targ_x = bli_obj_target_datatype( *x );
|
||||
//dt_targ_c = bli_obj_target_datatype( *c );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
|
||||
|
||||
// Create object to hold a copy-cast of alpha.
|
||||
@@ -74,10 +74,10 @@ void bli_her( obj_t* alpha,
|
||||
&alpha_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( x_is_contig &&
|
||||
c_is_contig )
|
||||
if ( x_has_unit_inc &&
|
||||
c_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
@@ -98,8 +98,8 @@ void bli_her( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -48,9 +48,9 @@ void bli_her2( obj_t* alpha,
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_y;
|
||||
//num_t dt_targ_c;
|
||||
bool_t x_is_contig;
|
||||
bool_t y_is_contig;
|
||||
bool_t c_is_contig;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t y_has_unit_inc;
|
||||
bool_t c_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
obj_t alpha_conj_local;
|
||||
num_t dt_alpha;
|
||||
@@ -65,11 +65,11 @@ void bli_her2( obj_t* alpha,
|
||||
dt_targ_y = bli_obj_target_datatype( *y );
|
||||
//dt_targ_c = bli_obj_target_datatype( *c );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -87,11 +87,11 @@ void bli_her2( obj_t* alpha,
|
||||
&alpha_conj_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( x_is_contig &&
|
||||
y_is_contig &&
|
||||
c_is_contig )
|
||||
if ( x_has_unit_inc &&
|
||||
y_has_unit_inc &&
|
||||
c_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
@@ -112,9 +112,9 @@ void bli_her2( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -49,9 +49,9 @@ void bli_symv( obj_t* alpha,
|
||||
num_t dt_targ_a;
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_y;
|
||||
bool_t a_is_contig;
|
||||
bool_t x_is_contig;
|
||||
bool_t y_is_contig;
|
||||
bool_t a_has_unit_inc;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t y_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
obj_t beta_local;
|
||||
num_t dt_alpha;
|
||||
@@ -67,11 +67,11 @@ void bli_symv( obj_t* alpha,
|
||||
dt_targ_x = bli_obj_target_datatype( *x );
|
||||
dt_targ_y = bli_obj_target_datatype( *y );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -95,11 +95,11 @@ void bli_symv( obj_t* alpha,
|
||||
&beta_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( a_is_contig &&
|
||||
x_is_contig &&
|
||||
y_is_contig )
|
||||
if ( a_has_unit_inc &&
|
||||
x_has_unit_inc &&
|
||||
y_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
@@ -120,9 +120,9 @@ void bli_symv( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -46,8 +46,8 @@ void bli_syr( obj_t* alpha,
|
||||
her_t* her_cntl;
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_c;
|
||||
bool_t x_is_contig;
|
||||
bool_t c_is_contig;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t c_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
num_t dt_alpha;
|
||||
|
||||
@@ -60,10 +60,10 @@ void bli_syr( obj_t* alpha,
|
||||
dt_targ_x = bli_obj_target_datatype( *x );
|
||||
dt_targ_c = bli_obj_target_datatype( *c );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -76,10 +76,10 @@ void bli_syr( obj_t* alpha,
|
||||
&alpha_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( x_is_contig &&
|
||||
c_is_contig )
|
||||
if ( x_has_unit_inc &&
|
||||
c_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
@@ -100,8 +100,8 @@ void bli_syr( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -48,9 +48,9 @@ void bli_syr2( obj_t* alpha,
|
||||
num_t dt_targ_x;
|
||||
num_t dt_targ_y;
|
||||
//num_t dt_targ_c;
|
||||
bool_t x_is_contig;
|
||||
bool_t y_is_contig;
|
||||
bool_t c_is_contig;
|
||||
bool_t x_has_unit_inc;
|
||||
bool_t y_has_unit_inc;
|
||||
bool_t c_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
num_t dt_alpha;
|
||||
|
||||
@@ -64,11 +64,11 @@ void bli_syr2( obj_t* alpha,
|
||||
dt_targ_y = bli_obj_target_datatype( *y );
|
||||
//dt_targ_c = bli_obj_target_datatype( *c );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_is_contig = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
c_is_contig = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -80,11 +80,11 @@ void bli_syr2( obj_t* alpha,
|
||||
&alpha_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( x_is_contig &&
|
||||
y_is_contig &&
|
||||
c_is_contig )
|
||||
if ( x_has_unit_inc &&
|
||||
y_has_unit_inc &&
|
||||
c_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
@@ -105,9 +105,9 @@ void bli_syr2( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -46,8 +46,8 @@ void bli_trmv( obj_t* alpha,
|
||||
trmv_t* trmv_cntl;
|
||||
num_t dt_targ_a;
|
||||
num_t dt_targ_x;
|
||||
bool_t a_is_contig;
|
||||
bool_t x_is_contig;
|
||||
bool_t a_has_unit_inc;
|
||||
bool_t x_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
num_t dt_alpha;
|
||||
|
||||
@@ -60,10 +60,10 @@ void bli_trmv( obj_t* alpha,
|
||||
dt_targ_a = bli_obj_target_datatype( *a );
|
||||
dt_targ_x = bli_obj_target_datatype( *x );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -75,10 +75,11 @@ void bli_trmv( obj_t* alpha,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( a_is_contig &&
|
||||
x_is_contig )
|
||||
if ( a_has_unit_inc &&
|
||||
x_has_unit_inc )
|
||||
{
|
||||
// We use two control trees to handle the four cases corresponding to
|
||||
// combinations of transposition and row/column-storage.
|
||||
@@ -99,8 +100,8 @@ void bli_trmv( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -46,8 +46,8 @@ void bli_trsv( obj_t* alpha,
|
||||
trsv_t* trsv_cntl;
|
||||
num_t dt_targ_a;
|
||||
num_t dt_targ_x;
|
||||
bool_t a_is_contig;
|
||||
bool_t x_is_contig;
|
||||
bool_t a_has_unit_inc;
|
||||
bool_t x_has_unit_inc;
|
||||
obj_t alpha_local;
|
||||
num_t dt_alpha;
|
||||
|
||||
@@ -60,10 +60,10 @@ void bli_trsv( obj_t* alpha,
|
||||
dt_targ_a = bli_obj_datatype( *a );
|
||||
dt_targ_x = bli_obj_datatype( *x );
|
||||
|
||||
// Determine whether each operand is stored contiguously.
|
||||
a_is_contig = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -76,10 +76,10 @@ void bli_trsv( obj_t* alpha,
|
||||
&alpha_local );
|
||||
|
||||
|
||||
// If all operands are contiguous, we choose a control tree for calling
|
||||
// If all operands have unit stride, we choose a control tree for calling
|
||||
// the unblocked implementation directly without any blocking.
|
||||
if ( a_is_contig &&
|
||||
x_is_contig )
|
||||
if ( a_has_unit_inc &&
|
||||
x_has_unit_inc )
|
||||
{
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
{
|
||||
@@ -96,8 +96,8 @@ void bli_trsv( obj_t* alpha,
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
|
||||
@@ -240,12 +240,10 @@ typedef struct
|
||||
#define BLIS_BITVAL_INVERT_DIAG 0x200
|
||||
#define BLIS_BITVAL_NOT_PACKED 0x0
|
||||
#define BLIS_BITVAL_PACKED_UNSPEC 0x10000
|
||||
#define BLIS_BITVAL_PACKED_VECTOR 0x20000
|
||||
#define BLIS_BITVAL_PACKED_ROWS 0x30000
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS 0x40000
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x50000
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS 0x60000
|
||||
#define BLIS_BITVAL_PACKED_BLOCKS 0x70000
|
||||
#define BLIS_BITVAL_PACKED_ROWS 0x20000
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS 0x30000
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x40000
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS 0x50000
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_UPPER 0x80000
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
|
||||
@@ -350,12 +348,11 @@ typedef enum
|
||||
{
|
||||
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
|
||||
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_VECTOR,
|
||||
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
|
||||
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
|
||||
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
|
||||
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
|
||||
BLIS_PACKED_BLOCKS = BLIS_BITVAL_PACKED_BLOCKS
|
||||
} pack_t;
|
||||
|
||||
|
||||
@@ -449,12 +446,12 @@ typedef struct blksz_s
|
||||
- 15: unused
|
||||
18 ~ 16 Packed type/status
|
||||
- 0 == not packed
|
||||
- 1 == packed (unspecified; row or column)
|
||||
- 2 == packed vector
|
||||
- 3 == packed by rows
|
||||
- 4 == packed by columns
|
||||
- 5 == packed by row panels
|
||||
- 6 == packed by column panels
|
||||
- 1 == packed (unspecified; row, column, or vector)
|
||||
- 2 == packed by rows
|
||||
- 3 == packed by columns
|
||||
- 4 == packed by row panels
|
||||
- 5 == packed by column panels
|
||||
- 6 == unused
|
||||
- 7 == unused
|
||||
19 Packed panel order if upper-stored
|
||||
- 0 == forward order if upper
|
||||
|
||||
Reference in New Issue
Block a user