diff --git a/frame/1/packv/bl2_packv_init.c b/frame/1/packv/bl2_packv_init.c index 65f18b7cd..8f2ecf0ff 100644 --- a/frame/1/packv/bl2_packv_init.c +++ b/frame/1/packv/bl2_packv_init.c @@ -126,7 +126,7 @@ void bl2_packv_init_pack( pack_t pack_schema, mem_t* mem_p; dim_t m_p_pad; - siz_t elem_size_p; + siz_t size_p; inc_t rs_p, cs_p; void* buf; @@ -149,16 +149,33 @@ void bl2_packv_init_pack( pack_t pack_schema, mem_p = bl2_obj_pack_mem( *p ); // Compute the dimensions padded by the dimension multiples. - m_p_pad = bl2_align_dim_to_mult( bl2_obj_vector_dim( *p ), mult_m_dim ); - elem_size_p = bl2_obj_elem_size( *p ); + m_p_pad = bl2_align_dim_to_mult( bl2_obj_vector_dim( *p ), mult_m_dim ); - // Check the mem_t entry of p. If it is not yet allocated, then acquire - // a memory block suitable for a vector. If the mem_t object has already - // been allocated a buffer, then update the dimensions embedded in the - // object according to the latest value in m_p_pad. - bl2_mem_alloc_update_v( m_p_pad, - elem_size_p, - mem_p ); + // Compute the size of the packed buffer. + size_p = m_p_pad * 1 * bl2_obj_elem_size( *p ); + + if ( bl2_mem_is_unalloc( mem_p ) ) + { + // If the mem_t object of p has not yet been allocated, then acquire + // a memory block suitable for a vector. + bl2_mem_acquire_v( size_p, + mem_p ); + } + else + { + // If the mem_t object has already been allocated, then release and + // re-acquire the memory so there is sufficient space. + if ( bl2_mem_size( mem_p ) < size_p ) + { + bl2_mem_release( mem_p ); + + bl2_mem_acquire_v( size_p, + mem_p ); + } + } + + // Save the padded (packed) dimensions into the packed object. + bl2_obj_set_packed_dims( m_p_pad, 1, *p ); // Grab the buffer address from the mem_t object and copy it to the // main object buffer field. (Sometimes this buffer address will be @@ -176,7 +193,7 @@ void bl2_packv_init_pack( pack_t pack_schema, // how much space beyond the vector would need to be zero-padded, if // zero-padding was needed. rs_p = 1; - cs_p = bl2_mem_length( mem_p ); + cs_p = bl2_obj_packed_length( *p ); bl2_obj_set_incs( rs_p, cs_p, *p ); } diff --git a/frame/1m/packm/bl2_packm_blk_var2.c b/frame/1m/packm/bl2_packm_blk_var2.c index adb60b57c..6163453e8 100644 --- a/frame/1m/packm/bl2_packm_blk_var2.c +++ b/frame/1m/packm/bl2_packm_blk_var2.c @@ -59,7 +59,6 @@ void bl2_packm_blk_var2( obj_t* beta, obj_t* p ) { num_t dt_cp = bl2_obj_datatype( *c ); - mem_t* mem_p = bl2_obj_pack_mem( *p ); struc_t strucc = bl2_obj_struc( *c ); doff_t diagoffc = bl2_obj_diag_offset( *c ); @@ -69,8 +68,8 @@ void bl2_packm_blk_var2( obj_t* beta, dim_t m_p = bl2_obj_length( *p ); dim_t n_p = bl2_obj_width( *p ); - dim_t m_max_p = bl2_mem_length( mem_p ); - dim_t n_max_p = bl2_mem_width( mem_p ); + dim_t m_max_p = bl2_obj_packed_length( *p ); + dim_t n_max_p = bl2_obj_packed_width( *p ); void* buf_c = bl2_obj_buffer_at_off( *c ); inc_t rs_c = bl2_obj_row_stride( *c ); diff --git a/frame/1m/packm/bl2_packm_blk_var3.c b/frame/1m/packm/bl2_packm_blk_var3.c index b16f7beb5..147fd6c3f 100644 --- a/frame/1m/packm/bl2_packm_blk_var3.c +++ b/frame/1m/packm/bl2_packm_blk_var3.c @@ -62,7 +62,6 @@ void bl2_packm_blk_var3( obj_t* beta, obj_t* p ) { num_t dt_cp = bl2_obj_datatype( *c ); - mem_t* mem_p = bl2_obj_pack_mem( *p ); struc_t strucc = bl2_obj_struc( *c ); doff_t diagoffc = bl2_obj_diag_offset( *c ); @@ -75,8 +74,8 @@ void bl2_packm_blk_var3( obj_t* beta, dim_t m_p = bl2_obj_length( *p ); dim_t n_p = bl2_obj_width( *p ); - dim_t m_max_p = bl2_mem_length( mem_p ); - dim_t n_max_p = bl2_mem_width( mem_p ); + dim_t m_max_p = bl2_obj_packed_length( *p ); + dim_t n_max_p = bl2_obj_packed_width( *p ); void* buf_c = bl2_obj_buffer_at_off( *c ); inc_t rs_c = bl2_obj_row_stride( *c ); diff --git a/frame/1m/packm/bl2_packm_init.c b/frame/1m/packm/bl2_packm_init.c index 0a1be45e8..b5f8f4115 100644 --- a/frame/1m/packm/bl2_packm_init.c +++ b/frame/1m/packm/bl2_packm_init.c @@ -174,7 +174,7 @@ void bl2_packm_init_pack( bool_t densify, mem_t* mem_p; dim_t m_p_pad, n_p_pad; - siz_t elem_size_p; + siz_t size_p; inc_t rs_p, cs_p; void* buf; @@ -220,24 +220,40 @@ void bl2_packm_init_pack( bool_t densify, mem_p = bl2_obj_pack_mem( *p ); // Compute the dimensions padded by the dimension multiples. These - // dimensions are those that the macro- and micro-kernels will use. + // dimensions represent the dimensions of the packed matrices, including + // zero-padding, and will be used by the macro- and micro-kernels. // We compute them by starting with the effective dimensions of c (now // in p) and aligning them to the dimension multiples (typically equal // to register blocksizes). This does waste a little bit of space for // level-2 operations, but that's okay with us. - m_p_pad = bl2_align_dim_to_mult( bl2_obj_length( *p ), mult_m_dim ); - n_p_pad = bl2_align_dim_to_mult( bl2_obj_width( *p ), mult_n_dim ); - elem_size_p = bl2_obj_elem_size( *p ); + m_p_pad = bl2_align_dim_to_mult( bl2_obj_length( *p ), mult_m_dim ); + n_p_pad = bl2_align_dim_to_mult( bl2_obj_width( *p ), mult_n_dim ); - // Check the mem_t entry of p. If it is not yet allocated, then acquire - // a memory block of type pack_buf_type. If the mem_t object has already - // been allocated a buffer, then update the dimensions embedded in the - // object according to the latest values in m_p_pad and n_p_pad. - bl2_mem_alloc_update_m( m_p_pad, - n_p_pad, - elem_size_p, - pack_buf_type, - mem_p ); + // Compute the size of the packed buffer. + size_p = m_p_pad * n_p_pad * bl2_obj_elem_size( *p ); + + if ( bl2_mem_is_unalloc( mem_p ) ) + { + // If the mem_t object of p has not yet been allocated, then acquire + // a memory block of type pack_buf_type. + bl2_mem_acquire_m( size_p, + pack_buf_type, + mem_p ); + } + else + { + // If the mem_t object is currently allocated and smaller than is + // needed, then something is very wrong, since the cache blocksizes + // that drive the level-3 blocked algorithms are the same ones that + // determine the sizes of the blocks within our memory allocator's + // memory pools. This branch should never be executed. + if ( bl2_mem_size( mem_p ) < size_p ) bl2_abort(); + } + + // Save the padded (packed) dimensions into the packed object. It is + // important to save these dimensions since they represent the actual + // dimensions of the zero-padded matrix. + bl2_obj_set_packed_dims( m_p_pad, n_p_pad, *p ); // Grab the buffer address from the mem_t object and copy it to the // main object buffer field. (Sometimes this buffer address will be @@ -250,24 +266,24 @@ void bl2_packm_init_pack( bool_t densify, // Set the row and column strides of p based on the pack schema. if ( pack_schema == BLIS_PACKED_ROWS ) { - // For regular row storage, the padded n dimension used when - // acquiring the pack memory should be used for our row stride, - // with the column stride set to one. By using the WIDTH of the mem_t - // region, we allow for zero-padding (if necessary/desired) along - // the right edge of the matrix. - rs_p = bl2_mem_width( mem_p ); + // For regular row storage, the packed width of our mem_t region + // should be used for the row stride, with the column stride set + // to one. By using the WIDTH of the mem_t region, we allow for + // zero-padding (if necessary/desired) along the right edge of + // the matrix. + rs_p = bl2_obj_packed_width( *p ); cs_p = 1; bl2_obj_set_incs( rs_p, cs_p, *p ); } else if ( pack_schema == BLIS_PACKED_COLUMNS ) { - // For regular column storage, the padded m dimension used when - // acquiring the pack memory should be used for our column stride, - // with the row stride set to one. By using the LENGTH of the mem_t - // region, we allow for zero-padding (if necessary/desired) along - // the bottom edge of the matrix. - cs_p = bl2_mem_length( mem_p ); + // For regular column storage, the packed length of our mem_t region + // should be used for the column stride, with the row stride set + // to one. By using the LENGTH of the mem_t region, we allow for + // zero-padding (if necessary/desired) along the bottom edge of + // the matrix. + cs_p = bl2_obj_packed_length( *p ); rs_p = 1; bl2_obj_set_incs( rs_p, cs_p, *p ); @@ -292,11 +308,11 @@ void bl2_packm_init_pack( bool_t densify, // The "panel stride" of a panel packed object is interpreted as the // distance between the (0,0) element of panel k and the (0,0) - // element of panel k+1. We use the WIDTH of the mem_t region to - // determine the panel "width"; this will allow for zero-padding + // element of panel k+1. We use the WIDTH of the packed mem_t region + // to determine the panel "width"; this will allow for zero-padding // (if necessary/desired) along the far end of each panel (ie: the // right edge of the matrix). - ps_p = cs_p * bl2_mem_width( mem_p ); + ps_p = cs_p * bl2_obj_packed_width( *p ); // Store the strides in p. bl2_obj_set_incs( rs_p, cs_p, *p ); @@ -322,11 +338,11 @@ void bl2_packm_init_pack( bool_t densify, // The "panel stride" of a panel packed object is interpreted as the // distance between the (0,0) element of panel k and the (0,0) - // element of panel k+1. We use the LENGTH of the mem_t region to - // determine the panel "length"; this will allow for zero-padding + // element of panel k+1. We use the LENGTH of the packed mem_t region + // to determine the panel "length"; this will allow for zero-padding // (if necessary/desired) along the far end of each panel (ie: the // bottom edge of the matrix). - ps_p = bl2_mem_length( mem_p ) * rs_p; + ps_p = bl2_obj_packed_length( *p ) * rs_p; // Store the strides in p. bl2_obj_set_incs( rs_p, cs_p, *p ); diff --git a/frame/1m/packm/bl2_packm_part.c b/frame/1m/packm/bl2_packm_part.c index 103485fad..96930a44a 100644 --- a/frame/1m/packm/bl2_packm_part.c +++ b/frame/1m/packm/bl2_packm_part.c @@ -74,22 +74,23 @@ void bl2_packm_acquire_mpart_t2b( subpart_t requested_part, // Modify offsets and dimensions of requested partition. bl2_obj_set_dims( b, n, *sub_obj ); - // Tweak the width of the pack_mem region of the subpartition to trick - // the underlying implementation into only zero-padding for the narrow - // submatrix of interest. Usually, the value we want is b (for non-edge - // cases), but at the edges, we want the remainder of the mem_t region - // in the m dimension. Edge cases are defined as occurring when i + b is - // exactly equal to the length of the parent object. In these cases, we - // arrive at the new pack_mem region width by simply subtracting off i. + // Tweak the packed length of the subpartition to trick the underlying + // implementation into only zero-padding for the narrow submatrix of + // interest. Usually, the value we want is b (for non-edge cases), but + // at the edges, we want the remainder of the mem_t region in the m + // dimension. Edge cases are defined as occurring when i + b is exactly + // equal to the inherited sub-object's length (which happens since the + // determine_blocksize function would have returned a smaller value of + // b for the edge iteration). In these cases, we arrive at the new + // packed length by simply subtracting off i. { - mem_t* pack_mem = bl2_obj_pack_mem( *sub_obj ); - dim_t m_max = bl2_mem_length( pack_mem ); - dim_t m_mem; + dim_t m_pack_max = bl2_obj_packed_length( *sub_obj ); + dim_t m_pack_cur; - if ( i + b == m ) m_mem = m_max - i; - else m_mem = b; + if ( i + b == m ) m_pack_cur = m_pack_max - i; + else m_pack_cur = b; - bl2_mem_set_length( m_mem, pack_mem ); + bl2_obj_set_packed_length( m_pack_cur, *sub_obj ); } // Translate the desired offsets to a panel offset and adjust the @@ -97,13 +98,16 @@ void bl2_packm_acquire_mpart_t2b( subpart_t requested_part, { char* buf_p = bl2_obj_buffer( *sub_obj ); siz_t elem_size = bl2_obj_elem_size( *sub_obj ); - inc_t cs_p = bl2_obj_col_stride( *sub_obj ); - dim_t off_to_elem = i * cs_p; + dim_t off_to_panel = bl2_packm_offset_to_panel_for( i, sub_obj ); - buf_p = buf_p + elem_size * off_to_elem; + buf_p = buf_p + elem_size * off_to_panel; bl2_obj_set_buffer( ( void* )buf_p, *sub_obj ); } + + // Don't have any code that utilizes this function yet. This abort is + // here to force someone to make sure the above works! + bl2_abort(); } @@ -148,40 +152,23 @@ void bl2_packm_acquire_mpart_l2r( subpart_t requested_part, // Modify offsets and dimensions of requested partition. bl2_obj_set_dims( m, b, *sub_obj ); -/* DON'T NEED THIS NOW THAT COPYING IS DONE IN _INIT_SUBPART_FROM(). - // Copy the pack_mem and cast_mem entries. + // Tweak the packed width of the subpartition to trick the underlying + // implementation into only zero-padding for the narrow submatrix of + // interest. Usually, the value we want is b (for non-edge cases), but + // at the edges, we want the remainder of the mem_t region in the n + // dimension. Edge cases are defined as occurring when j + b is exactly + // equal to the inherited sub-object's width (which happens since the + // determine_blocksize function would have returned a smaller value of + // b for the edge iteration). In these cases, we arrive at the new + // packed width by simply subtracting off j. { - mem_t* pack_mem = bl2_obj_pack_mem( *obj ); - mem_t* cast_mem = bl2_obj_cast_mem( *obj ); + dim_t n_pack_max = bl2_obj_packed_width( *sub_obj ); + dim_t n_pack_cur; - bl2_obj_set_pack_mem( pack_mem, *sub_obj ); - bl2_obj_set_cast_mem( cast_mem, *sub_obj ); - } + if ( j + b == n ) n_pack_cur = n_pack_max - j; + else n_pack_cur = b; - // Copy the panel stride from the original object. - { - inc_t ps = bl2_obj_panel_stride( *obj ); - - bl2_obj_set_panel_stride( ps, *sub_obj ); - } -*/ - - // Tweak the width of the pack_mem region of the subpartition to trick - // the underlying implementation into only zero-padding for the narrow - // submatrix of interest. Usually, the value we want is b (for non-edge - // cases), but at the edges, we want the remainder of the mem_t region - // in the n dimension. Edge cases are defined as occurring when j + b is - // exactly equal to the width of the parent object. In these cases, we - // arrive at the new pack_mem region width by simply subtracting off j. - { - mem_t* pack_mem = bl2_obj_pack_mem( *sub_obj ); - dim_t n_max = bl2_mem_width( pack_mem ); - dim_t n_mem; - - if ( j + b == n ) n_mem = n_max - j; - else n_mem = b; - - bl2_mem_set_width( n_mem, pack_mem ); + bl2_obj_set_packed_width( n_pack_cur, *sub_obj ); } // Translate the desired offsets to a panel offset and adjust the diff --git a/frame/1m/packm/bl2_packm_unb_var1.c b/frame/1m/packm/bl2_packm_unb_var1.c index 06fcd580b..dad01f212 100644 --- a/frame/1m/packm/bl2_packm_unb_var1.c +++ b/frame/1m/packm/bl2_packm_unb_var1.c @@ -60,7 +60,6 @@ void bl2_packm_unb_var1( obj_t* beta, obj_t* p ) { num_t dt_cp = bl2_obj_datatype( *c ); - mem_t* mem_p = bl2_obj_pack_mem( *p ); struc_t strucc = bl2_obj_struc( *c ); doff_t diagoffc = bl2_obj_diag_offset( *c ); @@ -71,8 +70,8 @@ void bl2_packm_unb_var1( obj_t* beta, dim_t m_p = bl2_obj_length( *p ); dim_t n_p = bl2_obj_width( *p ); - dim_t m_max_p = bl2_mem_length( mem_p ); - dim_t n_max_p = bl2_mem_width( mem_p ); + dim_t m_max_p = bl2_obj_packed_length( *p ); + dim_t n_max_p = bl2_obj_packed_width( *p ); void* buf_c = bl2_obj_buffer_at_off( *c ); inc_t rs_c = bl2_obj_row_stride( *c ); diff --git a/frame/1m/packm/old/bl2_packm_blk_var1.c b/frame/1m/packm/old/bl2_packm_blk_var1.c index 3aa6f0e80..eeed7553a 100644 --- a/frame/1m/packm/old/bl2_packm_blk_var1.c +++ b/frame/1m/packm/old/bl2_packm_blk_var1.c @@ -61,7 +61,6 @@ void bl2_packm_blk_var1( obj_t* beta, packm_t* cntl ) { num_t dt_cp = bl2_obj_datatype( *c ); - mem_t* mem_p = bl2_obj_pack_mem( *p ); struc_t strucc = bl2_obj_struc( *c ); doff_t diagoffc = bl2_obj_diag_offset( *c ); @@ -72,8 +71,8 @@ void bl2_packm_blk_var1( obj_t* beta, dim_t m_p = bl2_obj_length( *p ); dim_t n_p = bl2_obj_width( *p ); - dim_t m_max_p = bl2_mem_length( mem_p ); - dim_t n_max_p = bl2_mem_width( mem_p ); + dim_t m_max_p = bl2_obj_packed_length( *p ); + dim_t n_max_p = bl2_obj_packed_width( *p ); void* buf_c = bl2_obj_buffer_at_off( *c ); inc_t rs_c = bl2_obj_row_stride( *c ); diff --git a/frame/base/bl2_mem.c b/frame/base/bl2_mem.c index a07ab8d6f..00a31d9f8 100644 --- a/frame/base/bl2_mem.c +++ b/frame/base/bl2_mem.c @@ -101,13 +101,10 @@ static char pool_mn_mem[ BLIS_MN_POOL_SIZE ]; -void bl2_mem_acquire_m( dim_t m_req, - dim_t n_req, - siz_t elem_size, +void bl2_mem_acquire_m( siz_t req_size, packbuf_t buf_type, mem_t* mem ) { - siz_t req_size; siz_t block_size; dim_t pool_index; pool_t* pool; @@ -116,9 +113,6 @@ void bl2_mem_acquire_m( dim_t m_req, int i; - // Compute the size of the requested contiguous memory region. - req_size = m_req * n_req * elem_size; - if ( buf_type == BLIS_BUFFER_FOR_GEN_USE ) { // For general-use buffer requests, such as those used by level-2 @@ -128,17 +122,13 @@ void bl2_mem_acquire_m( dim_t m_req, // Initialize the mem_t object with: // - the address of the memory block, - // - the buffer type (a packbuf_t value), - // - the size of the requested region, and - // - the requested dimensions, which are presumably already aligned to - // dimension multiples (typically equal to register blocksizes). + // - the buffer type (a packbuf_t value), and + // - the size of the requested region. // NOTE: We do not initialize the pool field since this block did not // come from a contiguous memory pool. bl2_mem_set_buffer( block, mem ); bl2_mem_set_buf_type( buf_type, mem ); bl2_mem_set_size( req_size, mem ); - bl2_mem_set_dims( m_req, n_req, mem ); - bl2_mem_set_elem_size( elem_size, mem ); } else { @@ -197,17 +187,13 @@ void bl2_mem_acquire_m( dim_t m_req, // Initialize the mem_t object with: // - the address of the memory block, // - the buffer type (a packbuf_t value), - // - the address of the memory pool to which it belongs, + // - the address of the memory pool to which it belongs, and // - the size of the contiguous memory block (NOT the size of the - // requested region), and - // - the requested dimensions, which are presumably already aligned to - // dimension multiples (typically equal to register blocksizes). + // requested region). bl2_mem_set_buffer( block, mem ); bl2_mem_set_buf_type( buf_type, mem ); bl2_mem_set_pool( pool, mem ); bl2_mem_set_size( block_size, mem ); - bl2_mem_set_dims( m_req, n_req, mem ); - bl2_mem_set_elem_size( elem_size, mem ); } } @@ -264,26 +250,20 @@ void bl2_mem_release( mem_t* mem ) // Clear the mem_t object so that it appears unallocated. We clear: // - the buffer field, - // - the pool field, - // - the size field, and - // - the dimension fields. + // - the pool field, and + // - the size field. // NOTE: We do not clear the buf_type field since there is no // "uninitialized" value for packbuf_t. bl2_mem_set_buffer( NULL, mem ); bl2_mem_set_pool( NULL, mem ); bl2_mem_set_size( 0, mem ); - bl2_mem_set_dims( 0, 0, mem ); - bl2_mem_set_elem_size( 0, mem ); } -void bl2_mem_acquire_v( dim_t m_req, - siz_t elem_size, - mem_t* mem ) +void bl2_mem_acquire_v( siz_t req_size, + mem_t* mem ) { - bl2_mem_acquire_m( m_req, - 1, - elem_size, + bl2_mem_acquire_m( req_size, BLIS_BUFFER_FOR_GEN_USE, mem ); } diff --git a/frame/base/bl2_mem.h b/frame/base/bl2_mem.h index 2b8b22700..177df2e04 100644 --- a/frame/base/bl2_mem.h +++ b/frame/base/bl2_mem.h @@ -32,14 +32,11 @@ */ -void bl2_mem_acquire_m( dim_t m_req, - dim_t n_req, - siz_t elem_size, +void bl2_mem_acquire_m( siz_t req_size, packbuf_t buf_type, mem_t* mem ); -void bl2_mem_acquire_v( dim_t m_req, - siz_t elem_size, +void bl2_mem_acquire_v( siz_t req_size, mem_t* mem ); void bl2_mem_release( mem_t* mem ); diff --git a/frame/base/bl2_obj.c b/frame/base/bl2_obj.c index ac13a95fc..0ffa29d3c 100644 --- a/frame/base/bl2_obj.c +++ b/frame/base/bl2_obj.c @@ -478,16 +478,12 @@ void bl2_obj_print( char* label, obj_t* obj ) fprintf( file, " elem size %lu\n", bl2_obj_elem_size( *obj ) ); fprintf( file, " rs, cs %lu, %lu\n", bl2_obj_row_stride( *obj ), bl2_obj_col_stride( *obj ) ); - //fprintf( file, " cast_mem \n" ); - //fprintf( file, " - buf %p\n", bl2_mem_buffer( cast_mem ) ); - //fprintf( file, " - m %lu\n", bl2_mem_length( cast_mem ) ); - //fprintf( file, " - n %lu\n", bl2_mem_width( cast_mem ) ); fprintf( file, " pack_mem \n" ); fprintf( file, " - buf %p\n", bl2_mem_buffer( pack_mem ) ); fprintf( file, " - buf_type %u\n", bl2_mem_buf_type( pack_mem ) ); fprintf( file, " - size %lu\n", bl2_mem_size( pack_mem ) ); - fprintf( file, " - m used %lu\n", bl2_mem_length( pack_mem ) ); - fprintf( file, " - n used %lu\n", bl2_mem_width( pack_mem ) ); + fprintf( file, " m_packed %lu\n", bl2_obj_packed_length( *obj ) ); + fprintf( file, " n_packed %lu\n", bl2_obj_packed_width( *obj ) ); fprintf( file, " ps %lu\n", bl2_obj_panel_stride( *obj ) ); fprintf( file, "\n" ); diff --git a/frame/include/bl2_mem_macro_defs.h b/frame/include/bl2_mem_macro_defs.h index 3d756fff9..4e7ccd191 100644 --- a/frame/include/bl2_mem_macro_defs.h +++ b/frame/include/bl2_mem_macro_defs.h @@ -54,18 +54,6 @@ \ ( (mem_p)->size ) -#define bl2_mem_length( mem_p ) \ -\ - ( (mem_p)->m ) - -#define bl2_mem_width( mem_p ) \ -\ - ( (mem_p)->n ) - -#define bl2_mem_elem_size( mem_p ) \ -\ - ( (mem_p)->elem_size ) - #define bl2_mem_is_alloc( mem_p ) \ \ ( bl2_mem_buffer( mem_p ) != NULL ) @@ -97,134 +85,5 @@ mem_p->size = size0; \ } -#define bl2_mem_set_length( m0, mem_p ) \ -{ \ - mem_p->m = m0; \ -} - -#define bl2_mem_set_width( n0, mem_p ) \ -{ \ - mem_p->n = n0; \ -} - -#define bl2_mem_set_elem_size( elem_size0, mem_p ) \ -{ \ - mem_p->elem_size = elem_size0; \ -} - -#define bl2_mem_set_dims( m0, n0, mem_p ) \ -{ \ - bl2_mem_set_length( m0, mem_p ); \ - bl2_mem_set_width( n0, mem_p ); \ -} - - -// Allocate a mem_t object if it is unallocated, or update its dimensions -// if it is allocated. This macro is used for matrices. - -#define bl2_mem_alloc_update_m( m_padded, n_padded, elem_size, buf_type, mem_p ) \ -{ \ - bool_t needs_alloc; \ - siz_t size_needed; \ -\ - if ( bl2_mem_is_unalloc( mem_p ) ) \ - { \ - /* If the mem_t object is currently unallocated (NULL), mark it for - allocation. */ \ - needs_alloc = TRUE; \ - } \ - else \ - { \ - /* Compute the total buffer size needed. */ \ - size_needed = m_padded * n_padded * elem_size; \ -\ - if ( size_needed <= bl2_mem_size( mem_p ) ) \ - { \ - /* If the mem_t object is currently allocated, AND what is - allocated and available is equal to or greater than what is - needed, then set the dimensions according to how much we - need. This allows us to avoid unnecessarily releasing and - re-allocating when all we need is a subset of what is already - available. This case will occur when, for example, handling - both forward and backward edge cases. */ \ - bl2_mem_set_dims( m_padded, n_padded, mem_p ); \ -\ - needs_alloc = FALSE; \ - } \ - else /* if ( bl2_mem_size( mem_p ) < size_needed ) */ \ - { \ - /* If the mem_t object is currently allocated and smaller than is - needed, then something is very wrong, since the cache blocksizes - that drive the level-3 blocked algorithms are the same ones that - determine the sizes of the blocks within our memory allocator's - memory pools. This branch should never be executed. */ \ - bl2_abort(); \ -\ - needs_alloc = FALSE; \ - } \ - } \ -\ - if ( needs_alloc ) \ - { \ - bl2_mem_acquire_m( m_padded, \ - n_padded, \ - elem_size, \ - buf_type, \ - mem_p ); \ - } \ -} \ - - -// Allocate a mem_t object if it is unallocated, or update its dimensions -// if it is allocated. This macro is used for vectors. - -#define bl2_mem_alloc_update_v( m_padded, elem_size, mem_p ) \ -{ \ - bool_t needs_alloc; \ - siz_t size_needed; \ -\ - if ( bl2_mem_is_unalloc( mem_p ) ) \ - { \ - /* If the mem_t object is currently unallocated (NULL), mark it for - allocation. */ \ - needs_alloc = TRUE; \ - } \ - else \ - { \ - /* Compute the total buffer size needed. */ \ - size_needed = m_padded * elem_size; \ -\ - if ( size_needed <= bl2_mem_size( mem_p ) ) \ - { \ - /* If the mem_t object is currently allocated, AND what is - allocated and available is equal to or larger than what is - needed, then set the dimension according to how much we - need. This allows us to avoid unnecessarily releasing and - re-allocating when all we need is a subset of what is already - available. This case will occur when, for example, handling - both forward and backward edge cases. */ \ - bl2_mem_set_dims( m_padded, 1, mem_p ); \ -\ - needs_alloc = FALSE; \ - } \ - else /* if ( bl2_mem_size( mem_p ) < size_needed ) */ \ - { \ - /* If the mem_t object is currently allocated and smaller than is - needed, then release the memory and re-allocate. */ \ - bl2_mem_release( mem_p ); \ -\ - needs_alloc = TRUE; \ - } \ - } \ -\ - if ( needs_alloc ) \ - { \ - bl2_mem_acquire_v( m_padded, \ - elem_size, \ - mem_p ); \ - } \ -} \ - - #endif diff --git a/frame/include/bl2_obj_macro_defs.h b/frame/include/bl2_obj_macro_defs.h index c4ef096fd..cc6a209d3 100644 --- a/frame/include/bl2_obj_macro_defs.h +++ b/frame/include/bl2_obj_macro_defs.h @@ -570,21 +570,6 @@ bl2_obj_width_stored( obj ) } -// Panel stride query - -#define bl2_obj_panel_stride( obj ) \ -\ - ((obj).ps) - - -// Panel stride modification - -#define bl2_obj_set_panel_stride( panel_stride, obj ) \ -{ \ - (obj).ps = panel_stride; \ -} - - // Offset query #define bl2_obj_row_offset( obj ) \ @@ -709,13 +694,13 @@ bl2_obj_width_stored( obj ) } -// Pack mem entry query +// Pack mem_t entry query #define bl2_obj_pack_mem( obj ) \ \ ( &((obj).pack_mem) ) -// Pack mem entry modification +// Pack mem_t entry modification #define bl2_obj_set_pack_mem( mem_p, obj ) \ { \ @@ -723,6 +708,50 @@ bl2_obj_width_stored( obj ) } +// Packed dimensions query + +#define bl2_obj_packed_length( obj ) \ +\ + ( (obj).m_packed ) + +#define bl2_obj_packed_width( obj ) \ +\ + ( (obj).n_packed ) + +// Packed dimensions modification + +#define bl2_obj_set_packed_length( m0, obj ) \ +{ \ + (obj).m_packed = m0; \ +} + +#define bl2_obj_set_packed_width( n0, obj ) \ +{ \ + (obj).n_packed = n0; \ +} + +#define bl2_obj_set_packed_dims( m0, n0, obj ) \ +{ \ + bl2_obj_set_packed_length( m0, obj ); \ + bl2_obj_set_packed_width( n0, obj ); \ +} + + +// Packed panel stride query + +#define bl2_obj_panel_stride( obj ) \ +\ + ((obj).ps) + +// Packed panel stride modification + +#define bl2_obj_set_panel_stride( panel_stride, obj ) \ +{ \ + (obj).ps = panel_stride; \ +} + + +/* // Cast mem entry query #define bl2_obj_cast_mem( obj ) \ @@ -735,48 +764,6 @@ bl2_obj_width_stored( obj ) { \ (obj).cast_mem = *mem_p; \ } - -/* -// Mem entry query - -#define bl2_mem_buffer( mem_p ) \ -\ - ( (mem_p)->buf ) - -#define bl2_mem_elem_size( mem_p ) \ -\ - ( (mem_p)->elem_size ) - -#define bl2_mem_length( mem_p ) \ -\ - ( (mem_p)->m ) - -#define bl2_mem_width( mem_p ) \ -\ - ( (mem_p)->n ) - - -// Mem entry modification - -#define bl2_mem_set_buffer( buf0, mem_p ) \ -{ \ - mem_p->buf = buf0; \ -} - -#define bl2_mem_set_elem_size( elem_size0, mem_p ) \ -{ \ - mem_p->elem_size = elem_size0; \ -} - -#define bl2_mem_set_length( m0, mem_p ) \ -{ \ - mem_p->m = m0; \ -} - -#define bl2_mem_set_width( n0, mem_p ) \ -{ \ - mem_p->n = n0; \ -} */ diff --git a/frame/include/bl2_type_defs.h b/frame/include/bl2_type_defs.h index cd074a5a9..edfb4348f 100644 --- a/frame/include/bl2_type_defs.h +++ b/frame/include/bl2_type_defs.h @@ -275,9 +275,6 @@ typedef struct mem_s packbuf_t buf_type; pool_t* pool; siz_t size; - dim_t m; - dim_t n; - siz_t elem_size; } mem_t; // Blocksize object type @@ -376,9 +373,12 @@ typedef struct obj_s // Pack-related fields mem_t pack_mem; // cached memory region for packing - //mem_t cast_mem; // cached memory region for casting + dim_t m_packed; + dim_t n_packed; inc_t ps; // panel stride (distance to next panel) + //mem_t cast_mem; // cached memory region for casting + } obj_t; @@ -402,10 +402,10 @@ typedef struct obj_s (b).cs = (a).cs; \ \ /* We must NOT copy pack_mem field since this macro forms the basis of - bl2_obj_alias_to(), which is used in packm. There, we want to copy - over the basic fields of the obj_t but PRESERVE the pack_mem field - of the destination object since it holds the cached mem_t buffer - (and dimensions). */ \ + bl2_obj_alias_to(), which is used in packm_init(). There, we want to + copy the basic fields of the obj_t but PRESERVE the pack_mem field + (and the corresponding dimensions and stride) of the destination + object since it holds the cached mem_t object and buffer. */ \ } #define bl2_obj_init_subpart_from( a, b ) \ @@ -428,10 +428,13 @@ typedef struct obj_s /* We want to copy the pack_mem field here because this macro is used when creating subpartitions, including those of packed objects. In those situations, we want the subpartition to inherit the pack_mem - field of its parent. */ \ + field, and the corresponding packed dimensions, of its parent. */ \ (b).pack_mem = (a).pack_mem; \ - /*(b).cast_mem = (a).cast_mem;*/ \ + (b).m_packed = (a).m_packed; \ + (b).n_packed = (a).n_packed; \ (b).ps = (a).ps; \ +\ + /*(b).cast_mem = (a).cast_mem;*/ \ }