Relocated packed mem_t dimension fields to obj_t.

Details:
- Removed the m and n (and elem_size) fields from the mem_t object, and added
  m_packed and n_packed fields to obj_t. These new fields track the same as
  the old ones. From an abstraction standpoint, it seemed awkward to store
  those dimensions inside the mem_t.
- Updated interfaces to bl2_mem_acquire_*() so that only a byte size argument
  is passed in, instead of m, n, and elem_size.
- Updated bl2_packm_init_pack() and bl2_packv_init_pack() to inline the
  functionality of bl2_mem_alloc_update_m() and bl2_mem_alloc_update_v(),
  respectively.
- Updated packm variants to access the packed length and width fields from
  their new locations.
This commit is contained in:
Field G. Van Zee
2013-03-18 15:37:20 -05:00
parent 36c782857b
commit 1f82b51d06
13 changed files with 191 additions and 353 deletions

View File

@@ -126,7 +126,7 @@ void bl2_packv_init_pack( pack_t pack_schema,
mem_t* mem_p;
dim_t m_p_pad;
siz_t elem_size_p;
siz_t size_p;
inc_t rs_p, cs_p;
void* buf;
@@ -149,16 +149,33 @@ void bl2_packv_init_pack( pack_t pack_schema,
mem_p = bl2_obj_pack_mem( *p );
// Compute the dimensions padded by the dimension multiples.
m_p_pad = bl2_align_dim_to_mult( bl2_obj_vector_dim( *p ), mult_m_dim );
elem_size_p = bl2_obj_elem_size( *p );
m_p_pad = bl2_align_dim_to_mult( bl2_obj_vector_dim( *p ), mult_m_dim );
// Check the mem_t entry of p. If it is not yet allocated, then acquire
// a memory block suitable for a vector. If the mem_t object has already
// been allocated a buffer, then update the dimensions embedded in the
// object according to the latest value in m_p_pad.
bl2_mem_alloc_update_v( m_p_pad,
elem_size_p,
mem_p );
// Compute the size of the packed buffer.
size_p = m_p_pad * 1 * bl2_obj_elem_size( *p );
if ( bl2_mem_is_unalloc( mem_p ) )
{
// If the mem_t object of p has not yet been allocated, then acquire
// a memory block suitable for a vector.
bl2_mem_acquire_v( size_p,
mem_p );
}
else
{
// If the mem_t object has already been allocated, then release and
// re-acquire the memory so there is sufficient space.
if ( bl2_mem_size( mem_p ) < size_p )
{
bl2_mem_release( mem_p );
bl2_mem_acquire_v( size_p,
mem_p );
}
}
// Save the padded (packed) dimensions into the packed object.
bl2_obj_set_packed_dims( m_p_pad, 1, *p );
// Grab the buffer address from the mem_t object and copy it to the
// main object buffer field. (Sometimes this buffer address will be
@@ -176,7 +193,7 @@ void bl2_packv_init_pack( pack_t pack_schema,
// how much space beyond the vector would need to be zero-padded, if
// zero-padding was needed.
rs_p = 1;
cs_p = bl2_mem_length( mem_p );
cs_p = bl2_obj_packed_length( *p );
bl2_obj_set_incs( rs_p, cs_p, *p );
}

View File

@@ -59,7 +59,6 @@ void bl2_packm_blk_var2( obj_t* beta,
obj_t* p )
{
num_t dt_cp = bl2_obj_datatype( *c );
mem_t* mem_p = bl2_obj_pack_mem( *p );
struc_t strucc = bl2_obj_struc( *c );
doff_t diagoffc = bl2_obj_diag_offset( *c );
@@ -69,8 +68,8 @@ void bl2_packm_blk_var2( obj_t* beta,
dim_t m_p = bl2_obj_length( *p );
dim_t n_p = bl2_obj_width( *p );
dim_t m_max_p = bl2_mem_length( mem_p );
dim_t n_max_p = bl2_mem_width( mem_p );
dim_t m_max_p = bl2_obj_packed_length( *p );
dim_t n_max_p = bl2_obj_packed_width( *p );
void* buf_c = bl2_obj_buffer_at_off( *c );
inc_t rs_c = bl2_obj_row_stride( *c );

View File

@@ -62,7 +62,6 @@ void bl2_packm_blk_var3( obj_t* beta,
obj_t* p )
{
num_t dt_cp = bl2_obj_datatype( *c );
mem_t* mem_p = bl2_obj_pack_mem( *p );
struc_t strucc = bl2_obj_struc( *c );
doff_t diagoffc = bl2_obj_diag_offset( *c );
@@ -75,8 +74,8 @@ void bl2_packm_blk_var3( obj_t* beta,
dim_t m_p = bl2_obj_length( *p );
dim_t n_p = bl2_obj_width( *p );
dim_t m_max_p = bl2_mem_length( mem_p );
dim_t n_max_p = bl2_mem_width( mem_p );
dim_t m_max_p = bl2_obj_packed_length( *p );
dim_t n_max_p = bl2_obj_packed_width( *p );
void* buf_c = bl2_obj_buffer_at_off( *c );
inc_t rs_c = bl2_obj_row_stride( *c );

View File

@@ -174,7 +174,7 @@ void bl2_packm_init_pack( bool_t densify,
mem_t* mem_p;
dim_t m_p_pad, n_p_pad;
siz_t elem_size_p;
siz_t size_p;
inc_t rs_p, cs_p;
void* buf;
@@ -220,24 +220,40 @@ void bl2_packm_init_pack( bool_t densify,
mem_p = bl2_obj_pack_mem( *p );
// Compute the dimensions padded by the dimension multiples. These
// dimensions are those that the macro- and micro-kernels will use.
// dimensions represent the dimensions of the packed matrices, including
// zero-padding, and will be used by the macro- and micro-kernels.
// We compute them by starting with the effective dimensions of c (now
// in p) and aligning them to the dimension multiples (typically equal
// to register blocksizes). This does waste a little bit of space for
// level-2 operations, but that's okay with us.
m_p_pad = bl2_align_dim_to_mult( bl2_obj_length( *p ), mult_m_dim );
n_p_pad = bl2_align_dim_to_mult( bl2_obj_width( *p ), mult_n_dim );
elem_size_p = bl2_obj_elem_size( *p );
m_p_pad = bl2_align_dim_to_mult( bl2_obj_length( *p ), mult_m_dim );
n_p_pad = bl2_align_dim_to_mult( bl2_obj_width( *p ), mult_n_dim );
// Check the mem_t entry of p. If it is not yet allocated, then acquire
// a memory block of type pack_buf_type. If the mem_t object has already
// been allocated a buffer, then update the dimensions embedded in the
// object according to the latest values in m_p_pad and n_p_pad.
bl2_mem_alloc_update_m( m_p_pad,
n_p_pad,
elem_size_p,
pack_buf_type,
mem_p );
// Compute the size of the packed buffer.
size_p = m_p_pad * n_p_pad * bl2_obj_elem_size( *p );
if ( bl2_mem_is_unalloc( mem_p ) )
{
// If the mem_t object of p has not yet been allocated, then acquire
// a memory block of type pack_buf_type.
bl2_mem_acquire_m( size_p,
pack_buf_type,
mem_p );
}
else
{
// If the mem_t object is currently allocated and smaller than is
// needed, then something is very wrong, since the cache blocksizes
// that drive the level-3 blocked algorithms are the same ones that
// determine the sizes of the blocks within our memory allocator's
// memory pools. This branch should never be executed.
if ( bl2_mem_size( mem_p ) < size_p ) bl2_abort();
}
// Save the padded (packed) dimensions into the packed object. It is
// important to save these dimensions since they represent the actual
// dimensions of the zero-padded matrix.
bl2_obj_set_packed_dims( m_p_pad, n_p_pad, *p );
// Grab the buffer address from the mem_t object and copy it to the
// main object buffer field. (Sometimes this buffer address will be
@@ -250,24 +266,24 @@ void bl2_packm_init_pack( bool_t densify,
// Set the row and column strides of p based on the pack schema.
if ( pack_schema == BLIS_PACKED_ROWS )
{
// For regular row storage, the padded n dimension used when
// acquiring the pack memory should be used for our row stride,
// with the column stride set to one. By using the WIDTH of the mem_t
// region, we allow for zero-padding (if necessary/desired) along
// the right edge of the matrix.
rs_p = bl2_mem_width( mem_p );
// For regular row storage, the packed width of our mem_t region
// should be used for the row stride, with the column stride set
// to one. By using the WIDTH of the mem_t region, we allow for
// zero-padding (if necessary/desired) along the right edge of
// the matrix.
rs_p = bl2_obj_packed_width( *p );
cs_p = 1;
bl2_obj_set_incs( rs_p, cs_p, *p );
}
else if ( pack_schema == BLIS_PACKED_COLUMNS )
{
// For regular column storage, the padded m dimension used when
// acquiring the pack memory should be used for our column stride,
// with the row stride set to one. By using the LENGTH of the mem_t
// region, we allow for zero-padding (if necessary/desired) along
// the bottom edge of the matrix.
cs_p = bl2_mem_length( mem_p );
// For regular column storage, the packed length of our mem_t region
// should be used for the column stride, with the row stride set
// to one. By using the LENGTH of the mem_t region, we allow for
// zero-padding (if necessary/desired) along the bottom edge of
// the matrix.
cs_p = bl2_obj_packed_length( *p );
rs_p = 1;
bl2_obj_set_incs( rs_p, cs_p, *p );
@@ -292,11 +308,11 @@ void bl2_packm_init_pack( bool_t densify,
// The "panel stride" of a panel packed object is interpreted as the
// distance between the (0,0) element of panel k and the (0,0)
// element of panel k+1. We use the WIDTH of the mem_t region to
// determine the panel "width"; this will allow for zero-padding
// element of panel k+1. We use the WIDTH of the packed mem_t region
// to determine the panel "width"; this will allow for zero-padding
// (if necessary/desired) along the far end of each panel (ie: the
// right edge of the matrix).
ps_p = cs_p * bl2_mem_width( mem_p );
ps_p = cs_p * bl2_obj_packed_width( *p );
// Store the strides in p.
bl2_obj_set_incs( rs_p, cs_p, *p );
@@ -322,11 +338,11 @@ void bl2_packm_init_pack( bool_t densify,
// The "panel stride" of a panel packed object is interpreted as the
// distance between the (0,0) element of panel k and the (0,0)
// element of panel k+1. We use the LENGTH of the mem_t region to
// determine the panel "length"; this will allow for zero-padding
// element of panel k+1. We use the LENGTH of the packed mem_t region
// to determine the panel "length"; this will allow for zero-padding
// (if necessary/desired) along the far end of each panel (ie: the
// bottom edge of the matrix).
ps_p = bl2_mem_length( mem_p ) * rs_p;
ps_p = bl2_obj_packed_length( *p ) * rs_p;
// Store the strides in p.
bl2_obj_set_incs( rs_p, cs_p, *p );

View File

@@ -74,22 +74,23 @@ void bl2_packm_acquire_mpart_t2b( subpart_t requested_part,
// Modify offsets and dimensions of requested partition.
bl2_obj_set_dims( b, n, *sub_obj );
// Tweak the width of the pack_mem region of the subpartition to trick
// the underlying implementation into only zero-padding for the narrow
// submatrix of interest. Usually, the value we want is b (for non-edge
// cases), but at the edges, we want the remainder of the mem_t region
// in the m dimension. Edge cases are defined as occurring when i + b is
// exactly equal to the length of the parent object. In these cases, we
// arrive at the new pack_mem region width by simply subtracting off i.
// Tweak the packed length of the subpartition to trick the underlying
// implementation into only zero-padding for the narrow submatrix of
// interest. Usually, the value we want is b (for non-edge cases), but
// at the edges, we want the remainder of the mem_t region in the m
// dimension. Edge cases are defined as occurring when i + b is exactly
// equal to the inherited sub-object's length (which happens since the
// determine_blocksize function would have returned a smaller value of
// b for the edge iteration). In these cases, we arrive at the new
// packed length by simply subtracting off i.
{
mem_t* pack_mem = bl2_obj_pack_mem( *sub_obj );
dim_t m_max = bl2_mem_length( pack_mem );
dim_t m_mem;
dim_t m_pack_max = bl2_obj_packed_length( *sub_obj );
dim_t m_pack_cur;
if ( i + b == m ) m_mem = m_max - i;
else m_mem = b;
if ( i + b == m ) m_pack_cur = m_pack_max - i;
else m_pack_cur = b;
bl2_mem_set_length( m_mem, pack_mem );
bl2_obj_set_packed_length( m_pack_cur, *sub_obj );
}
// Translate the desired offsets to a panel offset and adjust the
@@ -97,13 +98,16 @@ void bl2_packm_acquire_mpart_t2b( subpart_t requested_part,
{
char* buf_p = bl2_obj_buffer( *sub_obj );
siz_t elem_size = bl2_obj_elem_size( *sub_obj );
inc_t cs_p = bl2_obj_col_stride( *sub_obj );
dim_t off_to_elem = i * cs_p;
dim_t off_to_panel = bl2_packm_offset_to_panel_for( i, sub_obj );
buf_p = buf_p + elem_size * off_to_elem;
buf_p = buf_p + elem_size * off_to_panel;
bl2_obj_set_buffer( ( void* )buf_p, *sub_obj );
}
// Don't have any code that utilizes this function yet. This abort is
// here to force someone to make sure the above works!
bl2_abort();
}
@@ -148,40 +152,23 @@ void bl2_packm_acquire_mpart_l2r( subpart_t requested_part,
// Modify offsets and dimensions of requested partition.
bl2_obj_set_dims( m, b, *sub_obj );
/* DON'T NEED THIS NOW THAT COPYING IS DONE IN _INIT_SUBPART_FROM().
// Copy the pack_mem and cast_mem entries.
// Tweak the packed width of the subpartition to trick the underlying
// implementation into only zero-padding for the narrow submatrix of
// interest. Usually, the value we want is b (for non-edge cases), but
// at the edges, we want the remainder of the mem_t region in the n
// dimension. Edge cases are defined as occurring when j + b is exactly
// equal to the inherited sub-object's width (which happens since the
// determine_blocksize function would have returned a smaller value of
// b for the edge iteration). In these cases, we arrive at the new
// packed width by simply subtracting off j.
{
mem_t* pack_mem = bl2_obj_pack_mem( *obj );
mem_t* cast_mem = bl2_obj_cast_mem( *obj );
dim_t n_pack_max = bl2_obj_packed_width( *sub_obj );
dim_t n_pack_cur;
bl2_obj_set_pack_mem( pack_mem, *sub_obj );
bl2_obj_set_cast_mem( cast_mem, *sub_obj );
}
if ( j + b == n ) n_pack_cur = n_pack_max - j;
else n_pack_cur = b;
// Copy the panel stride from the original object.
{
inc_t ps = bl2_obj_panel_stride( *obj );
bl2_obj_set_panel_stride( ps, *sub_obj );
}
*/
// Tweak the width of the pack_mem region of the subpartition to trick
// the underlying implementation into only zero-padding for the narrow
// submatrix of interest. Usually, the value we want is b (for non-edge
// cases), but at the edges, we want the remainder of the mem_t region
// in the n dimension. Edge cases are defined as occurring when j + b is
// exactly equal to the width of the parent object. In these cases, we
// arrive at the new pack_mem region width by simply subtracting off j.
{
mem_t* pack_mem = bl2_obj_pack_mem( *sub_obj );
dim_t n_max = bl2_mem_width( pack_mem );
dim_t n_mem;
if ( j + b == n ) n_mem = n_max - j;
else n_mem = b;
bl2_mem_set_width( n_mem, pack_mem );
bl2_obj_set_packed_width( n_pack_cur, *sub_obj );
}
// Translate the desired offsets to a panel offset and adjust the

View File

@@ -60,7 +60,6 @@ void bl2_packm_unb_var1( obj_t* beta,
obj_t* p )
{
num_t dt_cp = bl2_obj_datatype( *c );
mem_t* mem_p = bl2_obj_pack_mem( *p );
struc_t strucc = bl2_obj_struc( *c );
doff_t diagoffc = bl2_obj_diag_offset( *c );
@@ -71,8 +70,8 @@ void bl2_packm_unb_var1( obj_t* beta,
dim_t m_p = bl2_obj_length( *p );
dim_t n_p = bl2_obj_width( *p );
dim_t m_max_p = bl2_mem_length( mem_p );
dim_t n_max_p = bl2_mem_width( mem_p );
dim_t m_max_p = bl2_obj_packed_length( *p );
dim_t n_max_p = bl2_obj_packed_width( *p );
void* buf_c = bl2_obj_buffer_at_off( *c );
inc_t rs_c = bl2_obj_row_stride( *c );

View File

@@ -61,7 +61,6 @@ void bl2_packm_blk_var1( obj_t* beta,
packm_t* cntl )
{
num_t dt_cp = bl2_obj_datatype( *c );
mem_t* mem_p = bl2_obj_pack_mem( *p );
struc_t strucc = bl2_obj_struc( *c );
doff_t diagoffc = bl2_obj_diag_offset( *c );
@@ -72,8 +71,8 @@ void bl2_packm_blk_var1( obj_t* beta,
dim_t m_p = bl2_obj_length( *p );
dim_t n_p = bl2_obj_width( *p );
dim_t m_max_p = bl2_mem_length( mem_p );
dim_t n_max_p = bl2_mem_width( mem_p );
dim_t m_max_p = bl2_obj_packed_length( *p );
dim_t n_max_p = bl2_obj_packed_width( *p );
void* buf_c = bl2_obj_buffer_at_off( *c );
inc_t rs_c = bl2_obj_row_stride( *c );

View File

@@ -101,13 +101,10 @@ static char pool_mn_mem[ BLIS_MN_POOL_SIZE ];
void bl2_mem_acquire_m( dim_t m_req,
dim_t n_req,
siz_t elem_size,
void bl2_mem_acquire_m( siz_t req_size,
packbuf_t buf_type,
mem_t* mem )
{
siz_t req_size;
siz_t block_size;
dim_t pool_index;
pool_t* pool;
@@ -116,9 +113,6 @@ void bl2_mem_acquire_m( dim_t m_req,
int i;
// Compute the size of the requested contiguous memory region.
req_size = m_req * n_req * elem_size;
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
{
// For general-use buffer requests, such as those used by level-2
@@ -128,17 +122,13 @@ void bl2_mem_acquire_m( dim_t m_req,
// Initialize the mem_t object with:
// - the address of the memory block,
// - the buffer type (a packbuf_t value),
// - the size of the requested region, and
// - the requested dimensions, which are presumably already aligned to
// dimension multiples (typically equal to register blocksizes).
// - the buffer type (a packbuf_t value), and
// - the size of the requested region.
// NOTE: We do not initialize the pool field since this block did not
// come from a contiguous memory pool.
bl2_mem_set_buffer( block, mem );
bl2_mem_set_buf_type( buf_type, mem );
bl2_mem_set_size( req_size, mem );
bl2_mem_set_dims( m_req, n_req, mem );
bl2_mem_set_elem_size( elem_size, mem );
}
else
{
@@ -197,17 +187,13 @@ void bl2_mem_acquire_m( dim_t m_req,
// Initialize the mem_t object with:
// - the address of the memory block,
// - the buffer type (a packbuf_t value),
// - the address of the memory pool to which it belongs,
// - the address of the memory pool to which it belongs, and
// - the size of the contiguous memory block (NOT the size of the
// requested region), and
// - the requested dimensions, which are presumably already aligned to
// dimension multiples (typically equal to register blocksizes).
// requested region).
bl2_mem_set_buffer( block, mem );
bl2_mem_set_buf_type( buf_type, mem );
bl2_mem_set_pool( pool, mem );
bl2_mem_set_size( block_size, mem );
bl2_mem_set_dims( m_req, n_req, mem );
bl2_mem_set_elem_size( elem_size, mem );
}
}
@@ -264,26 +250,20 @@ void bl2_mem_release( mem_t* mem )
// Clear the mem_t object so that it appears unallocated. We clear:
// - the buffer field,
// - the pool field,
// - the size field, and
// - the dimension fields.
// - the pool field, and
// - the size field.
// NOTE: We do not clear the buf_type field since there is no
// "uninitialized" value for packbuf_t.
bl2_mem_set_buffer( NULL, mem );
bl2_mem_set_pool( NULL, mem );
bl2_mem_set_size( 0, mem );
bl2_mem_set_dims( 0, 0, mem );
bl2_mem_set_elem_size( 0, mem );
}
void bl2_mem_acquire_v( dim_t m_req,
siz_t elem_size,
mem_t* mem )
void bl2_mem_acquire_v( siz_t req_size,
mem_t* mem )
{
bl2_mem_acquire_m( m_req,
1,
elem_size,
bl2_mem_acquire_m( req_size,
BLIS_BUFFER_FOR_GEN_USE,
mem );
}

View File

@@ -32,14 +32,11 @@
*/
void bl2_mem_acquire_m( dim_t m_req,
dim_t n_req,
siz_t elem_size,
void bl2_mem_acquire_m( siz_t req_size,
packbuf_t buf_type,
mem_t* mem );
void bl2_mem_acquire_v( dim_t m_req,
siz_t elem_size,
void bl2_mem_acquire_v( siz_t req_size,
mem_t* mem );
void bl2_mem_release( mem_t* mem );

View File

@@ -478,16 +478,12 @@ void bl2_obj_print( char* label, obj_t* obj )
fprintf( file, " elem size %lu\n", bl2_obj_elem_size( *obj ) );
fprintf( file, " rs, cs %lu, %lu\n", bl2_obj_row_stride( *obj ),
bl2_obj_col_stride( *obj ) );
//fprintf( file, " cast_mem \n" );
//fprintf( file, " - buf %p\n", bl2_mem_buffer( cast_mem ) );
//fprintf( file, " - m %lu\n", bl2_mem_length( cast_mem ) );
//fprintf( file, " - n %lu\n", bl2_mem_width( cast_mem ) );
fprintf( file, " pack_mem \n" );
fprintf( file, " - buf %p\n", bl2_mem_buffer( pack_mem ) );
fprintf( file, " - buf_type %u\n", bl2_mem_buf_type( pack_mem ) );
fprintf( file, " - size %lu\n", bl2_mem_size( pack_mem ) );
fprintf( file, " - m used %lu\n", bl2_mem_length( pack_mem ) );
fprintf( file, " - n used %lu\n", bl2_mem_width( pack_mem ) );
fprintf( file, " m_packed %lu\n", bl2_obj_packed_length( *obj ) );
fprintf( file, " n_packed %lu\n", bl2_obj_packed_width( *obj ) );
fprintf( file, " ps %lu\n", bl2_obj_panel_stride( *obj ) );
fprintf( file, "\n" );

View File

@@ -54,18 +54,6 @@
\
( (mem_p)->size )
#define bl2_mem_length( mem_p ) \
\
( (mem_p)->m )
#define bl2_mem_width( mem_p ) \
\
( (mem_p)->n )
#define bl2_mem_elem_size( mem_p ) \
\
( (mem_p)->elem_size )
#define bl2_mem_is_alloc( mem_p ) \
\
( bl2_mem_buffer( mem_p ) != NULL )
@@ -97,134 +85,5 @@
mem_p->size = size0; \
}
#define bl2_mem_set_length( m0, mem_p ) \
{ \
mem_p->m = m0; \
}
#define bl2_mem_set_width( n0, mem_p ) \
{ \
mem_p->n = n0; \
}
#define bl2_mem_set_elem_size( elem_size0, mem_p ) \
{ \
mem_p->elem_size = elem_size0; \
}
#define bl2_mem_set_dims( m0, n0, mem_p ) \
{ \
bl2_mem_set_length( m0, mem_p ); \
bl2_mem_set_width( n0, mem_p ); \
}
// Allocate a mem_t object if it is unallocated, or update its dimensions
// if it is allocated. This macro is used for matrices.
#define bl2_mem_alloc_update_m( m_padded, n_padded, elem_size, buf_type, mem_p ) \
{ \
bool_t needs_alloc; \
siz_t size_needed; \
\
if ( bl2_mem_is_unalloc( mem_p ) ) \
{ \
/* If the mem_t object is currently unallocated (NULL), mark it for
allocation. */ \
needs_alloc = TRUE; \
} \
else \
{ \
/* Compute the total buffer size needed. */ \
size_needed = m_padded * n_padded * elem_size; \
\
if ( size_needed <= bl2_mem_size( mem_p ) ) \
{ \
/* If the mem_t object is currently allocated, AND what is
allocated and available is equal to or greater than what is
needed, then set the dimensions according to how much we
need. This allows us to avoid unnecessarily releasing and
re-allocating when all we need is a subset of what is already
available. This case will occur when, for example, handling
both forward and backward edge cases. */ \
bl2_mem_set_dims( m_padded, n_padded, mem_p ); \
\
needs_alloc = FALSE; \
} \
else /* if ( bl2_mem_size( mem_p ) < size_needed ) */ \
{ \
/* If the mem_t object is currently allocated and smaller than is
needed, then something is very wrong, since the cache blocksizes
that drive the level-3 blocked algorithms are the same ones that
determine the sizes of the blocks within our memory allocator's
memory pools. This branch should never be executed. */ \
bl2_abort(); \
\
needs_alloc = FALSE; \
} \
} \
\
if ( needs_alloc ) \
{ \
bl2_mem_acquire_m( m_padded, \
n_padded, \
elem_size, \
buf_type, \
mem_p ); \
} \
} \
// Allocate a mem_t object if it is unallocated, or update its dimensions
// if it is allocated. This macro is used for vectors.
#define bl2_mem_alloc_update_v( m_padded, elem_size, mem_p ) \
{ \
bool_t needs_alloc; \
siz_t size_needed; \
\
if ( bl2_mem_is_unalloc( mem_p ) ) \
{ \
/* If the mem_t object is currently unallocated (NULL), mark it for
allocation. */ \
needs_alloc = TRUE; \
} \
else \
{ \
/* Compute the total buffer size needed. */ \
size_needed = m_padded * elem_size; \
\
if ( size_needed <= bl2_mem_size( mem_p ) ) \
{ \
/* If the mem_t object is currently allocated, AND what is
allocated and available is equal to or larger than what is
needed, then set the dimension according to how much we
need. This allows us to avoid unnecessarily releasing and
re-allocating when all we need is a subset of what is already
available. This case will occur when, for example, handling
both forward and backward edge cases. */ \
bl2_mem_set_dims( m_padded, 1, mem_p ); \
\
needs_alloc = FALSE; \
} \
else /* if ( bl2_mem_size( mem_p ) < size_needed ) */ \
{ \
/* If the mem_t object is currently allocated and smaller than is
needed, then release the memory and re-allocate. */ \
bl2_mem_release( mem_p ); \
\
needs_alloc = TRUE; \
} \
} \
\
if ( needs_alloc ) \
{ \
bl2_mem_acquire_v( m_padded, \
elem_size, \
mem_p ); \
} \
} \
#endif

View File

@@ -570,21 +570,6 @@ bl2_obj_width_stored( obj )
}
// Panel stride query
#define bl2_obj_panel_stride( obj ) \
\
((obj).ps)
// Panel stride modification
#define bl2_obj_set_panel_stride( panel_stride, obj ) \
{ \
(obj).ps = panel_stride; \
}
// Offset query
#define bl2_obj_row_offset( obj ) \
@@ -709,13 +694,13 @@ bl2_obj_width_stored( obj )
}
// Pack mem entry query
// Pack mem_t entry query
#define bl2_obj_pack_mem( obj ) \
\
( &((obj).pack_mem) )
// Pack mem entry modification
// Pack mem_t entry modification
#define bl2_obj_set_pack_mem( mem_p, obj ) \
{ \
@@ -723,6 +708,50 @@ bl2_obj_width_stored( obj )
}
// Packed dimensions query
#define bl2_obj_packed_length( obj ) \
\
( (obj).m_packed )
#define bl2_obj_packed_width( obj ) \
\
( (obj).n_packed )
// Packed dimensions modification
#define bl2_obj_set_packed_length( m0, obj ) \
{ \
(obj).m_packed = m0; \
}
#define bl2_obj_set_packed_width( n0, obj ) \
{ \
(obj).n_packed = n0; \
}
#define bl2_obj_set_packed_dims( m0, n0, obj ) \
{ \
bl2_obj_set_packed_length( m0, obj ); \
bl2_obj_set_packed_width( n0, obj ); \
}
// Packed panel stride query
#define bl2_obj_panel_stride( obj ) \
\
((obj).ps)
// Packed panel stride modification
#define bl2_obj_set_panel_stride( panel_stride, obj ) \
{ \
(obj).ps = panel_stride; \
}
/*
// Cast mem entry query
#define bl2_obj_cast_mem( obj ) \
@@ -735,48 +764,6 @@ bl2_obj_width_stored( obj )
{ \
(obj).cast_mem = *mem_p; \
}
/*
// Mem entry query
#define bl2_mem_buffer( mem_p ) \
\
( (mem_p)->buf )
#define bl2_mem_elem_size( mem_p ) \
\
( (mem_p)->elem_size )
#define bl2_mem_length( mem_p ) \
\
( (mem_p)->m )
#define bl2_mem_width( mem_p ) \
\
( (mem_p)->n )
// Mem entry modification
#define bl2_mem_set_buffer( buf0, mem_p ) \
{ \
mem_p->buf = buf0; \
}
#define bl2_mem_set_elem_size( elem_size0, mem_p ) \
{ \
mem_p->elem_size = elem_size0; \
}
#define bl2_mem_set_length( m0, mem_p ) \
{ \
mem_p->m = m0; \
}
#define bl2_mem_set_width( n0, mem_p ) \
{ \
mem_p->n = n0; \
}
*/

View File

@@ -275,9 +275,6 @@ typedef struct mem_s
packbuf_t buf_type;
pool_t* pool;
siz_t size;
dim_t m;
dim_t n;
siz_t elem_size;
} mem_t;
// Blocksize object type
@@ -376,9 +373,12 @@ typedef struct obj_s
// Pack-related fields
mem_t pack_mem; // cached memory region for packing
//mem_t cast_mem; // cached memory region for casting
dim_t m_packed;
dim_t n_packed;
inc_t ps; // panel stride (distance to next panel)
//mem_t cast_mem; // cached memory region for casting
} obj_t;
@@ -402,10 +402,10 @@ typedef struct obj_s
(b).cs = (a).cs; \
\
/* We must NOT copy pack_mem field since this macro forms the basis of
bl2_obj_alias_to(), which is used in packm. There, we want to copy
over the basic fields of the obj_t but PRESERVE the pack_mem field
of the destination object since it holds the cached mem_t buffer
(and dimensions). */ \
bl2_obj_alias_to(), which is used in packm_init(). There, we want to
copy the basic fields of the obj_t but PRESERVE the pack_mem field
(and the corresponding dimensions and stride) of the destination
object since it holds the cached mem_t object and buffer. */ \
}
#define bl2_obj_init_subpart_from( a, b ) \
@@ -428,10 +428,13 @@ typedef struct obj_s
/* We want to copy the pack_mem field here because this macro is used
when creating subpartitions, including those of packed objects. In
those situations, we want the subpartition to inherit the pack_mem
field of its parent. */ \
field, and the corresponding packed dimensions, of its parent. */ \
(b).pack_mem = (a).pack_mem; \
/*(b).cast_mem = (a).cast_mem;*/ \
(b).m_packed = (a).m_packed; \
(b).n_packed = (a).n_packed; \
(b).ps = (a).ps; \
\
/*(b).cast_mem = (a).cast_mem;*/ \
}