From d628bf1da1560f1f5126a1ddfed8714f0a4b8da3 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 15 Jan 2014 11:40:12 -0600 Subject: [PATCH] Consolidated pack_t enums; retired VECTOR value. Details: - Changed the pack_t enumerations so that BLIS_PACKED_VECTOR no longer has its own value, and instead simply aliases to BLIS_PACKED_UNSPEC. This makes room in the three pack_t bits of the info field of obj_t so that two values are now unused, and may be used for other future purposes. - Updated sloppy terminology usage in comments in level-2 front-ends. (Replaced "is contiguous" with more accurate "has unit stride".) --- frame/2/gemv/bli_gemv.c | 30 +++++++++++++++--------------- frame/2/ger/bli_ger.c | 31 ++++++++++++++++--------------- frame/2/hemv/bli_hemv.c | 30 +++++++++++++++--------------- frame/2/her/bli_her.c | 22 +++++++++++----------- frame/2/her2/bli_her2.c | 30 +++++++++++++++--------------- frame/2/symv/bli_symv.c | 30 +++++++++++++++--------------- frame/2/syr/bli_syr.c | 22 +++++++++++----------- frame/2/syr2/bli_syr2.c | 30 +++++++++++++++--------------- frame/2/trmv/bli_trmv.c | 23 ++++++++++++----------- frame/2/trsv/bli_trsv.c | 22 +++++++++++----------- frame/include/bli_type_defs.h | 25 +++++++++++-------------- 11 files changed, 147 insertions(+), 148 deletions(-) diff --git a/frame/2/gemv/bli_gemv.c b/frame/2/gemv/bli_gemv.c index cceca9661..d932610ad 100644 --- a/frame/2/gemv/bli_gemv.c +++ b/frame/2/gemv/bli_gemv.c @@ -49,9 +49,9 @@ void bli_gemv( obj_t* alpha, num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; - bool_t a_is_contig; - bool_t x_is_contig; - bool_t y_is_contig; + bool_t a_has_unit_inc; + bool_t x_has_unit_inc; + bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; @@ -67,11 +67,11 @@ void bli_gemv( obj_t* alpha, dt_targ_x = bli_obj_target_datatype( *x ); dt_targ_y = bli_obj_target_datatype( *y ); - // Determine whether each operand is stored contiguously. - a_is_contig = ( bli_obj_is_row_stored( *a ) || - bli_obj_is_col_stored( *a ) ); - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); + // Determine whether each operand is stored with unit stride. + a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || + bli_obj_is_col_stored( *a ) ); + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -95,11 +95,11 @@ void bli_gemv( obj_t* alpha, &beta_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( a_is_contig && - x_is_contig && - y_is_contig ) + if ( a_has_unit_inc && + x_has_unit_inc && + y_has_unit_inc ) { // A row-major layout with no transpose is typically best served by // a dot-based implementation (and the same goes for a column-major @@ -123,9 +123,9 @@ void bli_gemv( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); + if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/ger/bli_ger.c b/frame/2/ger/bli_ger.c index 4677faaf0..02b2e27e5 100644 --- a/frame/2/ger/bli_ger.c +++ b/frame/2/ger/bli_ger.c @@ -48,9 +48,9 @@ void bli_ger( obj_t* alpha, num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_a; - bool_t x_is_contig; - bool_t y_is_contig; - bool_t a_is_contig; + bool_t x_has_unit_inc; + bool_t y_has_unit_inc; + bool_t a_has_unit_inc; obj_t alpha_local; num_t dt_alpha; @@ -64,11 +64,11 @@ void bli_ger( obj_t* alpha, dt_targ_y = bli_obj_target_datatype( *y ); //dt_targ_a = bli_obj_target_datatype( *a ); - // Determine whether each operand is stored contiguously. - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); - a_is_contig = ( bli_obj_is_row_stored( *a ) || - bli_obj_is_col_stored( *a ) ); + // Determine whether each operand with unit stride. + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); + a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || + bli_obj_is_col_stored( *a ) ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -80,11 +80,12 @@ void bli_ger( obj_t* alpha, alpha, &alpha_local ); - // If all operands are contiguous, we choose a control tree for calling + + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( x_is_contig && - y_is_contig && - a_is_contig ) + if ( x_has_unit_inc && + y_has_unit_inc && + a_has_unit_inc ) { // Use different control trees depending on storage of the matrix // operand. @@ -95,9 +96,9 @@ void bli_ger( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); - if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); + if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/hemv/bli_hemv.c b/frame/2/hemv/bli_hemv.c index 879f514d0..a20453072 100644 --- a/frame/2/hemv/bli_hemv.c +++ b/frame/2/hemv/bli_hemv.c @@ -49,9 +49,9 @@ void bli_hemv( obj_t* alpha, num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; - bool_t a_is_contig; - bool_t x_is_contig; - bool_t y_is_contig; + bool_t a_has_unit_inc; + bool_t x_has_unit_inc; + bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; @@ -67,11 +67,11 @@ void bli_hemv( obj_t* alpha, dt_targ_x = bli_obj_target_datatype( *x ); dt_targ_y = bli_obj_target_datatype( *y ); - // Determine whether each operand is stored contiguously. - a_is_contig = ( bli_obj_is_row_stored( *a ) || - bli_obj_is_col_stored( *a ) ); - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); + // Determine whether each operand with unit stride. + a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || + bli_obj_is_col_stored( *a ) ); + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -95,11 +95,11 @@ void bli_hemv( obj_t* alpha, &beta_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( a_is_contig && - x_is_contig && - y_is_contig ) + if ( a_has_unit_inc && + x_has_unit_inc && + y_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. @@ -120,9 +120,9 @@ void bli_hemv( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); + if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/her/bli_her.c b/frame/2/her/bli_her.c index 3d1333702..640e96eb5 100644 --- a/frame/2/her/bli_her.c +++ b/frame/2/her/bli_her.c @@ -46,8 +46,8 @@ void bli_her( obj_t* alpha, her_t* her_cntl; num_t dt_targ_x; //num_t dt_targ_c; - bool_t x_is_contig; - bool_t c_is_contig; + bool_t x_has_unit_inc; + bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; @@ -60,10 +60,10 @@ void bli_her( obj_t* alpha, dt_targ_x = bli_obj_target_datatype( *x ); //dt_targ_c = bli_obj_target_datatype( *c ); - // Determine whether each operand is stored contiguously. - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - c_is_contig = ( bli_obj_is_row_stored( *c ) || - bli_obj_is_col_stored( *c ) ); + // Determine whether each operand with unit stride. + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || + bli_obj_is_col_stored( *c ) ); // Create object to hold a copy-cast of alpha. @@ -74,10 +74,10 @@ void bli_her( obj_t* alpha, &alpha_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( x_is_contig && - c_is_contig ) + if ( x_has_unit_inc && + c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. @@ -98,8 +98,8 @@ void bli_her( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/her2/bli_her2.c b/frame/2/her2/bli_her2.c index b86590110..f8455bfc7 100644 --- a/frame/2/her2/bli_her2.c +++ b/frame/2/her2/bli_her2.c @@ -48,9 +48,9 @@ void bli_her2( obj_t* alpha, num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; - bool_t x_is_contig; - bool_t y_is_contig; - bool_t c_is_contig; + bool_t x_has_unit_inc; + bool_t y_has_unit_inc; + bool_t c_has_unit_inc; obj_t alpha_local; obj_t alpha_conj_local; num_t dt_alpha; @@ -65,11 +65,11 @@ void bli_her2( obj_t* alpha, dt_targ_y = bli_obj_target_datatype( *y ); //dt_targ_c = bli_obj_target_datatype( *c ); - // Determine whether each operand is stored contiguously. - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); - c_is_contig = ( bli_obj_is_row_stored( *c ) || - bli_obj_is_col_stored( *c ) ); + // Determine whether each operand with unit stride. + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); + c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || + bli_obj_is_col_stored( *c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -87,11 +87,11 @@ void bli_her2( obj_t* alpha, &alpha_conj_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( x_is_contig && - y_is_contig && - c_is_contig ) + if ( x_has_unit_inc && + y_has_unit_inc && + c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. @@ -112,9 +112,9 @@ void bli_her2( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); - if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); + if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/symv/bli_symv.c b/frame/2/symv/bli_symv.c index 19ca83868..873fa41f8 100644 --- a/frame/2/symv/bli_symv.c +++ b/frame/2/symv/bli_symv.c @@ -49,9 +49,9 @@ void bli_symv( obj_t* alpha, num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; - bool_t a_is_contig; - bool_t x_is_contig; - bool_t y_is_contig; + bool_t a_has_unit_inc; + bool_t x_has_unit_inc; + bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; @@ -67,11 +67,11 @@ void bli_symv( obj_t* alpha, dt_targ_x = bli_obj_target_datatype( *x ); dt_targ_y = bli_obj_target_datatype( *y ); - // Determine whether each operand is stored contiguously. - a_is_contig = ( bli_obj_is_row_stored( *a ) || - bli_obj_is_col_stored( *a ) ); - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); + // Determine whether each operand with unit stride. + a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || + bli_obj_is_col_stored( *a ) ); + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -95,11 +95,11 @@ void bli_symv( obj_t* alpha, &beta_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( a_is_contig && - x_is_contig && - y_is_contig ) + if ( a_has_unit_inc && + x_has_unit_inc && + y_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. @@ -120,9 +120,9 @@ void bli_symv( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); + if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/syr/bli_syr.c b/frame/2/syr/bli_syr.c index fe72cfd5d..454db0466 100644 --- a/frame/2/syr/bli_syr.c +++ b/frame/2/syr/bli_syr.c @@ -46,8 +46,8 @@ void bli_syr( obj_t* alpha, her_t* her_cntl; num_t dt_targ_x; num_t dt_targ_c; - bool_t x_is_contig; - bool_t c_is_contig; + bool_t x_has_unit_inc; + bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; @@ -60,10 +60,10 @@ void bli_syr( obj_t* alpha, dt_targ_x = bli_obj_target_datatype( *x ); dt_targ_c = bli_obj_target_datatype( *c ); - // Determine whether each operand is stored contiguously. - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - c_is_contig = ( bli_obj_is_row_stored( *c ) || - bli_obj_is_col_stored( *c ) ); + // Determine whether each operand with unit stride. + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || + bli_obj_is_col_stored( *c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -76,10 +76,10 @@ void bli_syr( obj_t* alpha, &alpha_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( x_is_contig && - c_is_contig ) + if ( x_has_unit_inc && + c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. @@ -100,8 +100,8 @@ void bli_syr( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/syr2/bli_syr2.c b/frame/2/syr2/bli_syr2.c index 985b9b5b2..4a053e360 100644 --- a/frame/2/syr2/bli_syr2.c +++ b/frame/2/syr2/bli_syr2.c @@ -48,9 +48,9 @@ void bli_syr2( obj_t* alpha, num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; - bool_t x_is_contig; - bool_t y_is_contig; - bool_t c_is_contig; + bool_t x_has_unit_inc; + bool_t y_has_unit_inc; + bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; @@ -64,11 +64,11 @@ void bli_syr2( obj_t* alpha, dt_targ_y = bli_obj_target_datatype( *y ); //dt_targ_c = bli_obj_target_datatype( *c ); - // Determine whether each operand is stored contiguously. - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); - y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); - c_is_contig = ( bli_obj_is_row_stored( *c ) || - bli_obj_is_col_stored( *c ) ); + // Determine whether each operand with unit stride. + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); + y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); + c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || + bli_obj_is_col_stored( *c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -80,11 +80,11 @@ void bli_syr2( obj_t* alpha, &alpha_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( x_is_contig && - y_is_contig && - c_is_contig ) + if ( x_has_unit_inc && + y_has_unit_inc && + c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. @@ -105,9 +105,9 @@ void bli_syr2( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); - if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); - if ( c_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); + if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/trmv/bli_trmv.c b/frame/2/trmv/bli_trmv.c index 4e7b87698..65742c8bb 100644 --- a/frame/2/trmv/bli_trmv.c +++ b/frame/2/trmv/bli_trmv.c @@ -46,8 +46,8 @@ void bli_trmv( obj_t* alpha, trmv_t* trmv_cntl; num_t dt_targ_a; num_t dt_targ_x; - bool_t a_is_contig; - bool_t x_is_contig; + bool_t a_has_unit_inc; + bool_t x_has_unit_inc; obj_t alpha_local; num_t dt_alpha; @@ -60,10 +60,10 @@ void bli_trmv( obj_t* alpha, dt_targ_a = bli_obj_target_datatype( *a ); dt_targ_x = bli_obj_target_datatype( *x ); - // Determine whether each operand is stored contiguously. - a_is_contig = ( bli_obj_is_row_stored( *a ) || - bli_obj_is_col_stored( *a ) ); - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); + // Determine whether each operand with unit stride. + a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || + bli_obj_is_col_stored( *a ) ); + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -75,10 +75,11 @@ void bli_trmv( obj_t* alpha, alpha, &alpha_local ); - // If all operands are contiguous, we choose a control tree for calling + + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( a_is_contig && - x_is_contig ) + if ( a_has_unit_inc && + x_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of transposition and row/column-storage. @@ -99,8 +100,8 @@ void bli_trmv( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/2/trsv/bli_trsv.c b/frame/2/trsv/bli_trsv.c index b68c4e113..e6d0ec66b 100644 --- a/frame/2/trsv/bli_trsv.c +++ b/frame/2/trsv/bli_trsv.c @@ -46,8 +46,8 @@ void bli_trsv( obj_t* alpha, trsv_t* trsv_cntl; num_t dt_targ_a; num_t dt_targ_x; - bool_t a_is_contig; - bool_t x_is_contig; + bool_t a_has_unit_inc; + bool_t x_has_unit_inc; obj_t alpha_local; num_t dt_alpha; @@ -60,10 +60,10 @@ void bli_trsv( obj_t* alpha, dt_targ_a = bli_obj_datatype( *a ); dt_targ_x = bli_obj_datatype( *x ); - // Determine whether each operand is stored contiguously. - a_is_contig = ( bli_obj_is_row_stored( *a ) || - bli_obj_is_col_stored( *a ) ); - x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); + // Determine whether each operand with unit stride. + a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || + bli_obj_is_col_stored( *a ) ); + x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use @@ -76,10 +76,10 @@ void bli_trsv( obj_t* alpha, &alpha_local ); - // If all operands are contiguous, we choose a control tree for calling + // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. - if ( a_is_contig && - x_is_contig ) + if ( a_has_unit_inc && + x_has_unit_inc ) { if ( bli_obj_has_notrans( *a ) ) { @@ -96,8 +96,8 @@ void bli_trsv( obj_t* alpha, { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. - if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); - if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); + if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); + if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 705bd8a49..1b0f0b08f 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -240,12 +240,10 @@ typedef struct #define BLIS_BITVAL_INVERT_DIAG 0x200 #define BLIS_BITVAL_NOT_PACKED 0x0 #define BLIS_BITVAL_PACKED_UNSPEC 0x10000 -#define BLIS_BITVAL_PACKED_VECTOR 0x20000 -#define BLIS_BITVAL_PACKED_ROWS 0x30000 -#define BLIS_BITVAL_PACKED_COLUMNS 0x40000 -#define BLIS_BITVAL_PACKED_ROW_PANELS 0x50000 -#define BLIS_BITVAL_PACKED_COL_PANELS 0x60000 -#define BLIS_BITVAL_PACKED_BLOCKS 0x70000 +#define BLIS_BITVAL_PACKED_ROWS 0x20000 +#define BLIS_BITVAL_PACKED_COLUMNS 0x30000 +#define BLIS_BITVAL_PACKED_ROW_PANELS 0x40000 +#define BLIS_BITVAL_PACKED_COL_PANELS 0x50000 #define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0 #define BLIS_BITVAL_PACK_REV_IF_UPPER 0x80000 #define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0 @@ -350,12 +348,11 @@ typedef enum { BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED, BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC, - BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_VECTOR, + BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC, BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS, BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS, BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS, BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS, - BLIS_PACKED_BLOCKS = BLIS_BITVAL_PACKED_BLOCKS } pack_t; @@ -449,12 +446,12 @@ typedef struct blksz_s - 15: unused 18 ~ 16 Packed type/status - 0 == not packed - - 1 == packed (unspecified; row or column) - - 2 == packed vector - - 3 == packed by rows - - 4 == packed by columns - - 5 == packed by row panels - - 6 == packed by column panels + - 1 == packed (unspecified; row, column, or vector) + - 2 == packed by rows + - 3 == packed by columns + - 4 == packed by row panels + - 5 == packed by column panels + - 6 == unused - 7 == unused 19 Packed panel order if upper-stored - 0 == forward order if upper