From 30e5eb29e060b97752f702d2ea5d101d950f53b2 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 13 Nov 2015 12:14:19 -0600 Subject: [PATCH] Minor changes to treatment of rs, cs in bli_obj.c. Details: - Applied a patch submitted by Devin Matthews that: - implements subtle changes to handling of somewhat unusual cases of row and column strides to accommodate certail tensor cases, which includes adding dimension parameters to _is_col_tilted() and _is_row_tilted() macros, - simplifies how buffers are sized when requested BLIS-allocated objects, - re-consolidates bli_adjust_strides_*() into one function, and - defines 'restrict' keyword as a "nothing" macro for C++ and pre-C99 environments. --- frame/base/bli_check.c | 6 +- frame/base/bli_obj.c | 140 ++++++++------------------- frame/base/bli_obj.h | 16 ++- frame/include/bli_macro_defs.h | 4 + frame/include/bli_param_macro_defs.h | 16 +-- 5 files changed, 65 insertions(+), 117 deletions(-) diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index f2cfdf85a..d333a4065 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -550,8 +550,10 @@ err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs, inc_t is ) { if ( rs == 1 && cs == 1 ) { - // Only allow rs == cs == 1 for scalars. - if ( m > 1 || n > 1 ) + // If rs == cs == 1, then we must be dealing with an m-by-1, a + // 1-by-n, or a 1-by-1 matrix and thus at least one of the + // dimensions, m or n, must be unit (even if the other is zero). + if ( m != 1 && n != 1 ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; } else if ( rs == 1 ) diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index 3395ec1b5..d8d152003 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -117,31 +117,23 @@ void bli_obj_alloc_buffer( inc_t rs, { dim_t n_elem = 0; dim_t m, n; - inc_t rs_abs, cs_abs, is_abs; siz_t elem_size; siz_t buffer_size; void* p; - m = bli_obj_length( *obj ); - n = bli_obj_width( *obj ); - - // Adjust the strides, if needed, before doing anything else - // (particularly, before doing any error checking). - bli_adjust_strides_alloc( m, n, &rs, &cs, &is ); - - if ( bli_error_checking_is_enabled() ) - bli_obj_alloc_buffer_check( rs, cs, is, obj ); + // Query the dimensions of the object we are allocating. + m = bli_obj_length( *obj ); + n = bli_obj_width( *obj ); // Query the size of one element. elem_size = bli_obj_elem_size( *obj ); - // Compute the magnitude of the row, column, and imaginary strides. - // We will use these in the comparisons below since those - // comparisions really relate only to the magnitudes of the strides, - // not their signs. - rs_abs = bli_abs( rs ); - cs_abs = bli_abs( cs ); - is_abs = bli_abs( is ); + // Adjust the strides, if needed, before doing anything else + // (particularly, before doing any error checking). + bli_adjust_strides( m, n, elem_size, &rs, &cs, &is ); + + if ( bli_error_checking_is_enabled() ) + bli_obj_alloc_buffer_check( rs, cs, is, obj ); // Determine how much object to allocate. if ( m == 0 || n == 0 ) @@ -150,44 +142,13 @@ void bli_obj_alloc_buffer( inc_t rs, // should remain unchanged (because alignment is not needed). n_elem = 0; } - else if ( rs_abs == 1 ) - { - cs = bli_align_dim_to_size( cs, elem_size, - BLIS_HEAP_STRIDE_ALIGN_SIZE ); - n_elem = bli_abs( cs ) * n; - } - else if ( cs_abs == 1 ) - { - rs = bli_align_dim_to_size( rs, elem_size, - BLIS_HEAP_STRIDE_ALIGN_SIZE ); - n_elem = bli_abs( rs ) * m; - } else { - if ( rs_abs < cs_abs ) - { - // Note this case is identical to that of rs == 1 above. - cs = bli_align_dim_to_size( cs, elem_size, - BLIS_HEAP_STRIDE_ALIGN_SIZE ); - n_elem = bli_abs( cs ) * n; - } - else if ( cs_abs < rs_abs ) - { - // Note this case is identical to that of cs == 1 above. - rs = bli_align_dim_to_size( rs, elem_size, - BLIS_HEAP_STRIDE_ALIGN_SIZE ); - n_elem = bli_abs( rs ) * m; - } - else // if ( cs_abs == rs_abs ) - { - // If the row and column strides are equal, it almost certainly - // means that m == n == 1. (If that is not the case, then - // something is very wrong.) - if ( m != 1 || n != 1 ) - bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - - n_elem = 1; - } + // The number of elements to allocate is given by the distance from + // the element with the lowest address (usually {0, 0}) to the element + // with the highest address (usually {m-1, n-1}), plus one for the + // highest element itself. + n_elem = (m-1) * bli_abs( rs ) + (n-1) * bli_abs( cs ) + 1; } // Handle the special case where imaginary stride is larger than @@ -197,10 +158,7 @@ void bli_obj_alloc_buffer( inc_t rs, // Notice that adding is/2 works regardless of whether the // imaginary stride is unit, something between unit and // 2*n_elem, or something bigger than 2*n_elem. - if ( 1 > is_abs ) - { - n_elem = is_abs / 2 + n_elem; - } + n_elem = bli_abs( is ) / 2 + n_elem; } // Compute the size of the total buffer to be allocated, which includes @@ -222,16 +180,9 @@ void bli_obj_attach_buffer( void* p, inc_t is, obj_t* obj ) { - dim_t m, n; - - m = bli_obj_length( *obj ); - n = bli_obj_width( *obj ); - - // Adjust the strides, if necessary. - bli_adjust_strides_attach( m, n, &rs, &cs, &is ); - - // Notice that we wait until after strides have been adjusted to - // error-check. + // Check that the strides and lengths are compatible. Note that the + // user *must* specify valid row and column strides when attaching an + // external buffer. if ( bli_error_checking_is_enabled() ) bli_obj_attach_buffer_check( p, rs, cs, is, obj ); @@ -357,42 +308,20 @@ void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ) *temp_i = ( gint_t ) bli_zreal( value ); } -void bli_adjust_strides_alloc( dim_t m, - dim_t n, - inc_t* rs, - inc_t* cs, - inc_t* is ) +void bli_adjust_strides( dim_t m, + dim_t n, + siz_t elem_size, + inc_t* rs, + inc_t* cs, + inc_t* is ) { // Here, we check the strides that were input from the user and modify // them if needed. // Handle the special "empty" case first. If either dimension is zero, - // we set row and column strides to zero. - if ( m == 0 || n == 0 ) - { - // NOTE: Previously, we forced the row and column strides to be zero - // if the matrices was being created with a zero dimension. It is - // *probably* the case that this is not necessary. However, in case - // a problem turns up later on, I'm leaving these lines commented - // out. - //*rs = 0; - //*cs = 0; - //*is = 1; - - return; - } - - bli_adjust_strides_attach( m, n, rs, cs, is ); -} - -void bli_adjust_strides_attach( dim_t m, - dim_t n, - inc_t* rs, - inc_t* cs, - inc_t* is ) -{ - // Here, we check the strides that were input from the user and modify - // them if needed. + // do nothing (this could represent a zero-length "slice" of another + // matrix). + if ( m == 0 || n == 0 ) return; // Interpret rs = cs = 0 as request for column storage. if ( *rs == 0 && *cs == 0 && ( *is == 0 || *is == 1 ) ) @@ -418,6 +347,21 @@ void bli_adjust_strides_attach( dim_t m, // Use default complex storage. *is = 1; + + // Align the strides depending on the tilt of the matrix. Note that + // scalars are neither row nor column tilted. Also note that alignment + // is only done for rs = cs = 0, and any user-supplied row and column + // strides are preserved. + if ( bli_is_col_tilted( m, n, *rs, *cs ) ) + { + *cs = bli_align_dim_to_size( *cs, elem_size, + BLIS_HEAP_STRIDE_ALIGN_SIZE ); + } + else if ( bli_is_row_tilted( m, n, *rs, *cs ) ) + { + *rs = bli_align_dim_to_size( *rs, elem_size, + BLIS_HEAP_STRIDE_ALIGN_SIZE ); + } } else if ( *rs == 1 && *cs == 1 ) { diff --git a/frame/base/bli_obj.h b/frame/base/bli_obj.h index d9926a52d..92e9b1d87 100644 --- a/frame/base/bli_obj.h +++ b/frame/base/bli_obj.h @@ -78,16 +78,12 @@ void bli_obj_create_const( double value, obj_t* obj ); void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ); -void bli_adjust_strides_alloc( dim_t m, - dim_t n, - inc_t* rs, - inc_t* cs, - inc_t* is ); -void bli_adjust_strides_attach( dim_t m, - dim_t n, - inc_t* rs, - inc_t* cs, - inc_t* is ); +void bli_adjust_strides( dim_t m, + dim_t n, + siz_t elem_size, + inc_t* rs, + inc_t* cs, + inc_t* is ); siz_t bli_datatype_size( num_t dt ); diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index 95c858002..797c5d611 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -40,12 +40,16 @@ #ifdef __cplusplus // Language is C++; define restrict as nothing. + #ifndef restrict #define restrict + #endif #elif __STDC_VERSION__ >= 199901L // Language is C99 (or later); do nothing since restrict is recognized. #else // Language is pre-C99; define restrict as nothing. + #ifndef restrict #define restrict + #endif #endif diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index c2e05ccac..6be16bc0c 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -378,13 +378,15 @@ ( bli_abs( rs ) != 1 && \ bli_abs( cs ) != 1 ) -#define bli_is_row_tilted( rs, cs ) \ +#define bli_is_row_tilted( m, n, rs, cs ) \ \ - ( bli_abs( cs ) < bli_abs( rs ) ) + ( bli_abs( cs ) == bli_abs( rs ) ? n < m \ + : bli_abs( cs ) < bli_abs( rs ) ) -#define bli_is_col_tilted( rs, cs ) \ +#define bli_is_col_tilted( m, n, rs, cs ) \ \ - ( bli_abs( rs ) < bli_abs( cs ) ) + ( bli_abs( rs ) == bli_abs( cs ) ? m < n \ + : bli_abs( rs ) < bli_abs( cs ) ) #define bli_has_nonunit_inc1( inc1 ) \ \ @@ -780,7 +782,7 @@ uplo_eff = uploa; \ diagoff_eff = diagoffa_use; \ \ - if ( bli_is_row_tilted( inca, lda ) ) \ + if ( bli_is_row_tilted( n_elem_max, n_iter_max, inca, lda ) ) \ { \ bli_swap_dims( n_iter_max, n_elem_max ); \ bli_swap_incs( inca, lda ); \ @@ -967,8 +969,8 @@ bli_negate_diag_offset( diagoff_eff ); \ } \ \ - if ( bli_is_row_tilted( incb, ldb ) && \ - bli_is_row_tilted( inca, lda ) ) \ + if ( bli_is_row_tilted( n_elem_max, n_iter_max, incb, ldb ) && \ + bli_is_row_tilted( n_elem_max, n_iter_max, inca, lda ) ) \ { \ bli_swap_dims( n_iter_max, n_elem_max ); \ bli_swap_incs( inca, lda ); \