From 650d2a6ff2e593151a296ca86b5214afcc747afc Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 9 Feb 2015 14:59:20 -0600 Subject: [PATCH] Added initial support for imaginary stride. Details: - Added an imaginary stride field ("is") to obj_t. - Renamed bli_obj_set_incs() macro to bli_obj_set_strides(). - Defined bli_obj_imag_stride() and bli_obj_set_imag_stride() and added invocations in key locations. - Added some basic error-checking related to imaginary stride. - For now, imaginary stride will not be exposed into the most-used BLIS APIs such as bli_obj_create(), and certainly not the computational APIs such as bli_dgemm(). --- frame/1/packv/bli_packv_init.c | 4 +- frame/1/unpackv/bli_unpackv_int.c | 2 +- frame/1m/packm/bli_packm_init.c | 10 ++--- frame/1m/unpackm/bli_unpackm_int.c | 2 +- frame/base/bli_check.c | 7 ++-- frame/base/bli_check.h | 2 +- frame/base/bli_obj.c | 59 +++++++++++++++++++++--------- frame/base/bli_obj.h | 5 ++- frame/base/bli_obj_scalar.c | 3 +- frame/base/check/bli_obj_check.c | 8 ++-- frame/base/check/bli_obj_check.h | 2 + frame/include/bli_obj_macro_defs.h | 17 ++++++++- frame/include/bli_type_defs.h | 3 ++ 13 files changed, 87 insertions(+), 37 deletions(-) diff --git a/frame/1/packv/bli_packv_init.c b/frame/1/packv/bli_packv_init.c index 8ad095051..fadf23e95 100644 --- a/frame/1/packv/bli_packv_init.c +++ b/frame/1/packv/bli_packv_init.c @@ -195,7 +195,7 @@ void bli_packv_init_pack( pack_t pack_schema, rs_p = 1; cs_p = bli_obj_padded_length( *p ); - bli_obj_set_incs( rs_p, cs_p, *p ); + bli_obj_set_strides( rs_p, cs_p, *p ); } } @@ -243,7 +243,7 @@ void bli_packv_init_cast( obj_t* a, // Update the strides. We set the increments to reflect a column storage. // Note that the column stride should never be used. - bli_obj_set_incs( 1, dim_a, *c ); + bli_obj_set_strides( 1, dim_a, *c ); } */ diff --git a/frame/1/unpackv/bli_unpackv_int.c b/frame/1/unpackv/bli_unpackv_int.c index f3c2cc23c..ccddecab8 100644 --- a/frame/1/unpackv/bli_unpackv_int.c +++ b/frame/1/unpackv/bli_unpackv_int.c @@ -192,7 +192,7 @@ void bli_unpackv_init_cast( obj_t* p, // though it should never be used because there is no second column to // index into (and therefore it also does not need to be aligned). bli_obj_set_dims( dim_a, 1, *c ); - bli_obj_set_incs( 1, dim_a, *c ); + bli_obj_set_strides( 1, dim_a, *c ); // Reset the view offsets to (0,0). bli_obj_set_offs( 0, 0, *c ); diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 6e4723076..250c7fa3a 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -287,7 +287,7 @@ void bli_packm_init_pack( invdiag_t invert_diag, BLIS_HEAP_STRIDE_ALIGN_SIZE ); // Store the strides in p. - bli_obj_set_incs( rs_p, cs_p, *p ); + bli_obj_set_strides( rs_p, cs_p, *p ); // Compute the size of the packed buffer. size_p = m_p_pad * rs_p * elem_size_p; @@ -310,7 +310,7 @@ void bli_packm_init_pack( invdiag_t invert_diag, BLIS_HEAP_STRIDE_ALIGN_SIZE ); // Store the strides in p. - bli_obj_set_incs( rs_p, cs_p, *p ); + bli_obj_set_strides( rs_p, cs_p, *p ); // Compute the size of the packed buffer. size_p = cs_p * n_p_pad * elem_size_p; @@ -377,7 +377,7 @@ void bli_packm_init_pack( invdiag_t invert_diag, } // Store the strides and panel dimension in p. - bli_obj_set_incs( rs_p, cs_p, *p ); + bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_panel_dim( m_panel, *p ); bli_obj_set_panel_stride( ps_p, *p ); bli_obj_set_panel_length( m_panel, *p ); @@ -448,7 +448,7 @@ void bli_packm_init_pack( invdiag_t invert_diag, } // Store the strides and panel dimension in p. - bli_obj_set_incs( rs_p, cs_p, *p ); + bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_panel_dim( n_panel, *p ); bli_obj_set_panel_stride( ps_p, *p ); bli_obj_set_panel_length( m_p, *p ); @@ -552,7 +552,7 @@ void bli_packm_init_cast( obj_t* a, cs_c = bli_align_dim_to_size( m_a, elem_size_c, BLIS_HEAP_STRIDE_ALIGN_SIZE ); rs_c = 1; - bli_obj_set_incs( rs_c, cs_c, *c ); + bli_obj_set_strides( rs_c, cs_c, *c ); } */ diff --git a/frame/1m/unpackm/bli_unpackm_int.c b/frame/1m/unpackm/bli_unpackm_int.c index 893d36c41..bee104d05 100644 --- a/frame/1m/unpackm/bli_unpackm_int.c +++ b/frame/1m/unpackm/bli_unpackm_int.c @@ -213,6 +213,6 @@ void bli_unpackm_init_cast( obj_t* p, cs_c = bli_align_dim_to_size( m_a, elem_size_c, BLIS_HEAP_STRIDE_ALIGN_SIZE ); rs_c = 1; - bli_obj_set_incs( rs_c, cs_c, *c ); + bli_obj_set_strides( rs_c, cs_c, *c ); } */ diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index 838e0a937..f2cfdf85a 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -492,7 +492,7 @@ err_t bli_check_object_diag_offset_equals( obj_t* a, doff_t offset ) // -- Stride-related checks ---------------------------------------------------- -err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs ) +err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs, inc_t is ) { err_t e_val = BLIS_SUCCESS; @@ -509,14 +509,15 @@ err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs ) // since the checks below are not dependent on the sign of the strides. rs = bli_abs( rs ); cs = bli_abs( cs ); + is = bli_abs( is ); // The default case (whereby we interpret rs == cs == 0 as a request for // column-major order) is handled prior to calling this function, so the // only time we should see zero strides here is if the matrix is empty. if ( m == 0 || n == 0 ) return e_val; - // Disallow either of the strides to be zero. - if ( ( rs == 0 || cs == 0 ) ) + // Disallow row, column, or imaginary strides of zero. + if ( ( rs == 0 || cs == 0 || is == 0 ) ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; // Check stride consistency in cases of general stride. diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index 41fbb5b64..721575ae9 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -75,7 +75,7 @@ err_t bli_check_object_width_equals( obj_t* a, dim_t n ); err_t bli_check_vector_dim_equals( obj_t* a, dim_t n ); err_t bli_check_object_diag_offset_equals( obj_t* a, doff_t offset ); -err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs ); +err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs, inc_t is ); err_t bli_check_general_object( obj_t* a ); err_t bli_check_hermitian_object( obj_t* a ); diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index ff7ed8c7a..b6277dcb3 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -43,7 +43,7 @@ void bli_obj_create( num_t dt, { bli_obj_create_without_buffer( dt, m, n, obj ); - bli_obj_alloc_buffer( rs, cs, obj ); + bli_obj_alloc_buffer( rs, cs, 1, obj ); } void bli_obj_create_with_attached_buffer( num_t dt, @@ -56,7 +56,7 @@ void bli_obj_create_with_attached_buffer( num_t dt, { bli_obj_create_without_buffer( dt, m, n, obj ); - bli_obj_attach_buffer( p, rs, cs, obj ); + bli_obj_attach_buffer( p, rs, cs, 1, obj ); } void bli_obj_create_without_buffer( num_t dt, @@ -112,11 +112,12 @@ void bli_obj_create_without_buffer( num_t dt, void bli_obj_alloc_buffer( inc_t rs, inc_t cs, + inc_t is, obj_t* obj ) { dim_t n_elem = 0; dim_t m, n; - inc_t rs_abs, cs_abs; + inc_t rs_abs, cs_abs, is_abs; siz_t elem_size; siz_t buffer_size; void* p; @@ -126,19 +127,21 @@ void bli_obj_alloc_buffer( inc_t rs, // Adjust the strides, if needed, before doing anything else // (particularly, before doing any error checking). - bli_adjust_strides( m, n, &rs, &cs ); + bli_adjust_strides( m, n, &rs, &cs, &is ); if ( bli_error_checking_is_enabled() ) - bli_obj_alloc_buffer_check( rs, cs, obj ); + bli_obj_alloc_buffer_check( rs, cs, is, obj ); // Query the size of one element. elem_size = bli_obj_elem_size( *obj ); - // Compute the magnitude of the row and column strides. We will use - // these in the comparisons below since those comparisions really - // relate only to the magnitudes of the strides, not their signs. + // Compute the magnitude of the row, column, and imaginary strides. + // We will use these in the comparisons below since those + // comparisions really relate only to the magnitudes of the strides, + // not their signs. rs_abs = bli_abs( rs ); cs_abs = bli_abs( cs ); + is_abs = bli_abs( is ); // Determine how much object to allocate. if ( m == 0 || n == 0 ) @@ -187,6 +190,19 @@ void bli_obj_alloc_buffer( inc_t rs, } } + // Handle the special case where imaginary stride is larger than + // normal. + if ( bli_obj_is_complex( *obj ) ) + { + // Notice that adding is/2 works regardless of whether the + // imaginary stride is unit, something between unit and + // 2*n_elem, or something bigger than 2*n_elem. + if ( 1 > is_abs ) + { + n_elem = is_abs / 2 + n_elem; + } + } + // Compute the size of the total buffer to be allocated, which includes // padding if the leading dimension was increased for alignment purposes. buffer_size = ( siz_t )n_elem * elem_size; @@ -196,12 +212,14 @@ void bli_obj_alloc_buffer( inc_t rs, // Set individual fields. bli_obj_set_buffer( p, *obj ); - bli_obj_set_incs( rs, cs, *obj ); + bli_obj_set_strides( rs, cs, *obj ); + bli_obj_set_imag_stride( is, *obj ); } void bli_obj_attach_buffer( void* p, inc_t rs, inc_t cs, + inc_t is, obj_t* obj ) { dim_t m, n; @@ -210,16 +228,17 @@ void bli_obj_attach_buffer( void* p, n = bli_obj_width( *obj ); // Adjust the strides, if necessary. - bli_adjust_strides( m, n, &rs, &cs ); + bli_adjust_strides( m, n, &rs, &cs, &is ); // Notice that we wait until after strides have been adjusted to // error-check. if ( bli_error_checking_is_enabled() ) - bli_obj_attach_buffer_check( p, rs, cs, obj ); + bli_obj_attach_buffer_check( p, rs, cs, is, obj ); // Update the object. bli_obj_set_buffer( p, *obj ); - bli_obj_set_incs( rs, cs, *obj ); + bli_obj_set_strides( rs, cs, *obj ); + bli_obj_set_imag_stride( is, *obj ); } void bli_obj_create_1x1( num_t dt, @@ -227,7 +246,7 @@ void bli_obj_create_1x1( num_t dt, { bli_obj_create_without_buffer( dt, 1, 1, obj ); - bli_obj_alloc_buffer( 1, 1, obj ); + bli_obj_alloc_buffer( 1, 1, 1, obj ); } void bli_obj_create_1x1_with_attached_buffer( num_t dt, @@ -236,7 +255,7 @@ void bli_obj_create_1x1_with_attached_buffer( num_t dt, { bli_obj_create_without_buffer( dt, 1, 1, obj ); - bli_obj_attach_buffer( p, 1, 1, obj ); + bli_obj_attach_buffer( p, 1, 1, 1, obj ); } void bli_obj_free( obj_t* obj ) @@ -341,23 +360,25 @@ void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ) void bli_adjust_strides( dim_t m, dim_t n, inc_t* rs, - inc_t* cs ) + inc_t* cs, + inc_t* is ) { // Here, we check the strides that were input from the user and modify // them if needed. // Handle the special "empty" case first. If either dimension is zero, - // we set both strides to zero. + // we set row and column strides to zero. if ( m == 0 || n == 0 ) { *rs = 0; *cs = 0; + *is = 1; return; } // Interpret rs = cs = 0 as request for column storage. - if ( *rs == 0 && *cs == 0 ) + if ( *rs == 0 && *cs == 0 && ( *is == 0 || *is == 1 ) ) { // First we handle the 1x1 scalar case explicitly. if ( m == 1 && n == 1 ) @@ -377,6 +398,9 @@ void bli_adjust_strides( dim_t m, *rs = 1; *cs = m; } + + // Use default complex storage. + *is = 1; } else if ( *rs == 1 && *cs == 1 ) { @@ -494,6 +518,7 @@ void bli_obj_print( char* label, obj_t* obj ) fprintf( file, " elem size %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) ); fprintf( file, " rs, cs %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ), ( signed long int )bli_obj_col_stride( *obj ) ); + fprintf( file, " is %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) ); fprintf( file, " pack_mem \n" ); fprintf( file, " - buf %p\n", ( void* )bli_mem_buffer( pack_mem ) ); fprintf( file, " - buf_type %lu\n", ( unsigned long int )bli_mem_buf_type( pack_mem ) ); diff --git a/frame/base/bli_obj.h b/frame/base/bli_obj.h index a4369e22d..316024012 100644 --- a/frame/base/bli_obj.h +++ b/frame/base/bli_obj.h @@ -56,11 +56,13 @@ void bli_obj_create_without_buffer( num_t dt, void bli_obj_alloc_buffer( inc_t rs, inc_t cs, + inc_t is, obj_t* obj ); void bli_obj_attach_buffer( void* p, inc_t rs, inc_t cs, + inc_t is, obj_t* obj ); void bli_obj_create_1x1( num_t dt, @@ -79,7 +81,8 @@ void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ); void bli_adjust_strides( dim_t m, dim_t n, inc_t* rs, - inc_t* cs ); + inc_t* cs, + inc_t* is ); siz_t bli_datatype_size( num_t dt ); diff --git a/frame/base/bli_obj_scalar.c b/frame/base/bli_obj_scalar.c index f8d33957c..ad041a567 100644 --- a/frame/base/bli_obj_scalar.c +++ b/frame/base/bli_obj_scalar.c @@ -48,7 +48,8 @@ void bli_obj_scalar_init_detached( num_t dt, // Update the object. bli_obj_set_buffer( p, *beta ); - bli_obj_set_incs( 1, 1, *beta ); + bli_obj_set_strides( 1, 1, *beta ); + bli_obj_set_imag_stride( 1, *beta ); } void bli_obj_scalar_init_detached_copy_of( num_t dt, diff --git a/frame/base/check/bli_obj_check.c b/frame/base/check/bli_obj_check.c index d7ed185e9..a0668bb7a 100644 --- a/frame/base/check/bli_obj_check.c +++ b/frame/base/check/bli_obj_check.c @@ -46,7 +46,7 @@ void bli_obj_create_check( num_t dt, e_val = bli_check_valid_datatype( dt ); bli_check_error_code( e_val ); - e_val = bli_check_matrix_strides( m, n, rs, cs ); + e_val = bli_check_matrix_strides( m, n, rs, cs, 1 ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); @@ -69,13 +69,14 @@ void bli_obj_create_without_buffer_check( num_t dt, void bli_obj_alloc_buffer_check( inc_t rs, inc_t cs, + inc_t is, obj_t* obj ) { err_t e_val; e_val = bli_check_matrix_strides( bli_obj_length( *obj ), bli_obj_width( *obj ), - rs, cs ); + rs, cs, is ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); @@ -85,6 +86,7 @@ void bli_obj_alloc_buffer_check( inc_t rs, void bli_obj_attach_buffer_check( void* p, inc_t rs, inc_t cs, + inc_t is, obj_t* obj ) { err_t e_val; @@ -100,7 +102,7 @@ void bli_obj_attach_buffer_check( void* p, e_val = bli_check_matrix_strides( bli_obj_length( *obj ), bli_obj_width( *obj ), - rs, cs ); + rs, cs, is ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); diff --git a/frame/base/check/bli_obj_check.h b/frame/base/check/bli_obj_check.h index 68e653aaa..60beb7275 100644 --- a/frame/base/check/bli_obj_check.h +++ b/frame/base/check/bli_obj_check.h @@ -46,11 +46,13 @@ void bli_obj_create_without_buffer_check( num_t dt, void bli_obj_alloc_buffer_check( inc_t rs, inc_t cs, + inc_t is, obj_t* obj ); void bli_obj_attach_buffer_check( void* p, inc_t rs, inc_t cs, + inc_t is, obj_t* obj ); void bli_obj_create_scalar_check( num_t dt, diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index 1887eeefa..df859266f 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -602,6 +602,10 @@ bli_obj_width_stored( obj ) \ ((obj).cs) +#define bli_obj_imag_stride( obj ) \ +\ + ((obj).is) + #define bli_obj_row_stride_mag( obj ) \ \ ( bli_abs( bli_obj_row_stride( obj ) ) ) @@ -610,6 +614,10 @@ bli_obj_width_stored( obj ) \ ( bli_abs( bli_obj_col_stride( obj ) ) ) +#define bli_obj_imag_stride_mag( obj ) \ +\ + ( bli_abs( bli_obj_imag_stride( obj ) ) ) + // // NOTE: The following two macros differ from their non-obj counterparts // in that they do not identify m x 1 and 1 x n objects as row-stored and @@ -641,12 +649,17 @@ bli_obj_width_stored( obj ) // Stride/increment modification -#define bli_obj_set_incs( row_stride, col_stride, obj ) \ +#define bli_obj_set_strides( row_stride, col_stride, obj ) \ { \ (obj).rs = row_stride; \ (obj).cs = col_stride; \ } +#define bli_obj_set_imag_stride( imag_stride, obj ) \ +{ \ + (obj).is = imag_stride; \ +} + // Offset query @@ -998,7 +1011,7 @@ bli_obj_width_stored( obj ) doff_t diag_off = bli_obj_diag_offset( obj ); \ \ bli_obj_set_dims( n, m, obj ); \ - bli_obj_set_incs( cs, rs, obj ); \ + bli_obj_set_strides( cs, rs, obj ); \ bli_obj_set_offs( offn, offm, obj ); \ bli_obj_set_diag_offset( -diag_off, obj ); \ \ diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 050fa6ef1..a1a83bd0d 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -581,6 +581,7 @@ typedef struct obj_s void* buffer; inc_t rs; inc_t cs; + inc_t is; // Bufferless scalar storage atom_t scalar; @@ -615,6 +616,7 @@ typedef struct obj_s (b).buffer = (a).buffer; \ (b).rs = (a).rs; \ (b).cs = (a).cs; \ + (b).is = (a).is; \ \ (b).scalar = (a).scalar; \ \ @@ -657,6 +659,7 @@ typedef struct obj_s (b).buffer = (a).buffer; \ (b).rs = (a).rs; \ (b).cs = (a).cs; \ + (b).is = (a).is; \ \ (b).scalar = (a).scalar; \ \