mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Relocated packed mem_t dimension fields to obj_t.
Details: - Removed the m and n (and elem_size) fields from the mem_t object, and added m_packed and n_packed fields to obj_t. These new fields track the same as the old ones. From an abstraction standpoint, it seemed awkward to store those dimensions inside the mem_t. - Updated interfaces to bl2_mem_acquire_*() so that only a byte size argument is passed in, instead of m, n, and elem_size. - Updated bl2_packm_init_pack() and bl2_packv_init_pack() to inline the functionality of bl2_mem_alloc_update_m() and bl2_mem_alloc_update_v(), respectively. - Updated packm variants to access the packed length and width fields from their new locations.
This commit is contained in:
@@ -126,7 +126,7 @@ void bl2_packv_init_pack( pack_t pack_schema,
|
||||
|
||||
mem_t* mem_p;
|
||||
dim_t m_p_pad;
|
||||
siz_t elem_size_p;
|
||||
siz_t size_p;
|
||||
inc_t rs_p, cs_p;
|
||||
void* buf;
|
||||
|
||||
@@ -149,16 +149,33 @@ void bl2_packv_init_pack( pack_t pack_schema,
|
||||
mem_p = bl2_obj_pack_mem( *p );
|
||||
|
||||
// Compute the dimensions padded by the dimension multiples.
|
||||
m_p_pad = bl2_align_dim_to_mult( bl2_obj_vector_dim( *p ), mult_m_dim );
|
||||
elem_size_p = bl2_obj_elem_size( *p );
|
||||
m_p_pad = bl2_align_dim_to_mult( bl2_obj_vector_dim( *p ), mult_m_dim );
|
||||
|
||||
// Check the mem_t entry of p. If it is not yet allocated, then acquire
|
||||
// a memory block suitable for a vector. If the mem_t object has already
|
||||
// been allocated a buffer, then update the dimensions embedded in the
|
||||
// object according to the latest value in m_p_pad.
|
||||
bl2_mem_alloc_update_v( m_p_pad,
|
||||
elem_size_p,
|
||||
mem_p );
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = m_p_pad * 1 * bl2_obj_elem_size( *p );
|
||||
|
||||
if ( bl2_mem_is_unalloc( mem_p ) )
|
||||
{
|
||||
// If the mem_t object of p has not yet been allocated, then acquire
|
||||
// a memory block suitable for a vector.
|
||||
bl2_mem_acquire_v( size_p,
|
||||
mem_p );
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the mem_t object has already been allocated, then release and
|
||||
// re-acquire the memory so there is sufficient space.
|
||||
if ( bl2_mem_size( mem_p ) < size_p )
|
||||
{
|
||||
bl2_mem_release( mem_p );
|
||||
|
||||
bl2_mem_acquire_v( size_p,
|
||||
mem_p );
|
||||
}
|
||||
}
|
||||
|
||||
// Save the padded (packed) dimensions into the packed object.
|
||||
bl2_obj_set_packed_dims( m_p_pad, 1, *p );
|
||||
|
||||
// Grab the buffer address from the mem_t object and copy it to the
|
||||
// main object buffer field. (Sometimes this buffer address will be
|
||||
@@ -176,7 +193,7 @@ void bl2_packv_init_pack( pack_t pack_schema,
|
||||
// how much space beyond the vector would need to be zero-padded, if
|
||||
// zero-padding was needed.
|
||||
rs_p = 1;
|
||||
cs_p = bl2_mem_length( mem_p );
|
||||
cs_p = bl2_obj_packed_length( *p );
|
||||
|
||||
bl2_obj_set_incs( rs_p, cs_p, *p );
|
||||
}
|
||||
|
||||
@@ -59,7 +59,6 @@ void bl2_packm_blk_var2( obj_t* beta,
|
||||
obj_t* p )
|
||||
{
|
||||
num_t dt_cp = bl2_obj_datatype( *c );
|
||||
mem_t* mem_p = bl2_obj_pack_mem( *p );
|
||||
|
||||
struc_t strucc = bl2_obj_struc( *c );
|
||||
doff_t diagoffc = bl2_obj_diag_offset( *c );
|
||||
@@ -69,8 +68,8 @@ void bl2_packm_blk_var2( obj_t* beta,
|
||||
|
||||
dim_t m_p = bl2_obj_length( *p );
|
||||
dim_t n_p = bl2_obj_width( *p );
|
||||
dim_t m_max_p = bl2_mem_length( mem_p );
|
||||
dim_t n_max_p = bl2_mem_width( mem_p );
|
||||
dim_t m_max_p = bl2_obj_packed_length( *p );
|
||||
dim_t n_max_p = bl2_obj_packed_width( *p );
|
||||
|
||||
void* buf_c = bl2_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bl2_obj_row_stride( *c );
|
||||
|
||||
@@ -62,7 +62,6 @@ void bl2_packm_blk_var3( obj_t* beta,
|
||||
obj_t* p )
|
||||
{
|
||||
num_t dt_cp = bl2_obj_datatype( *c );
|
||||
mem_t* mem_p = bl2_obj_pack_mem( *p );
|
||||
|
||||
struc_t strucc = bl2_obj_struc( *c );
|
||||
doff_t diagoffc = bl2_obj_diag_offset( *c );
|
||||
@@ -75,8 +74,8 @@ void bl2_packm_blk_var3( obj_t* beta,
|
||||
|
||||
dim_t m_p = bl2_obj_length( *p );
|
||||
dim_t n_p = bl2_obj_width( *p );
|
||||
dim_t m_max_p = bl2_mem_length( mem_p );
|
||||
dim_t n_max_p = bl2_mem_width( mem_p );
|
||||
dim_t m_max_p = bl2_obj_packed_length( *p );
|
||||
dim_t n_max_p = bl2_obj_packed_width( *p );
|
||||
|
||||
void* buf_c = bl2_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bl2_obj_row_stride( *c );
|
||||
|
||||
@@ -174,7 +174,7 @@ void bl2_packm_init_pack( bool_t densify,
|
||||
|
||||
mem_t* mem_p;
|
||||
dim_t m_p_pad, n_p_pad;
|
||||
siz_t elem_size_p;
|
||||
siz_t size_p;
|
||||
inc_t rs_p, cs_p;
|
||||
void* buf;
|
||||
|
||||
@@ -220,24 +220,40 @@ void bl2_packm_init_pack( bool_t densify,
|
||||
mem_p = bl2_obj_pack_mem( *p );
|
||||
|
||||
// Compute the dimensions padded by the dimension multiples. These
|
||||
// dimensions are those that the macro- and micro-kernels will use.
|
||||
// dimensions represent the dimensions of the packed matrices, including
|
||||
// zero-padding, and will be used by the macro- and micro-kernels.
|
||||
// We compute them by starting with the effective dimensions of c (now
|
||||
// in p) and aligning them to the dimension multiples (typically equal
|
||||
// to register blocksizes). This does waste a little bit of space for
|
||||
// level-2 operations, but that's okay with us.
|
||||
m_p_pad = bl2_align_dim_to_mult( bl2_obj_length( *p ), mult_m_dim );
|
||||
n_p_pad = bl2_align_dim_to_mult( bl2_obj_width( *p ), mult_n_dim );
|
||||
elem_size_p = bl2_obj_elem_size( *p );
|
||||
m_p_pad = bl2_align_dim_to_mult( bl2_obj_length( *p ), mult_m_dim );
|
||||
n_p_pad = bl2_align_dim_to_mult( bl2_obj_width( *p ), mult_n_dim );
|
||||
|
||||
// Check the mem_t entry of p. If it is not yet allocated, then acquire
|
||||
// a memory block of type pack_buf_type. If the mem_t object has already
|
||||
// been allocated a buffer, then update the dimensions embedded in the
|
||||
// object according to the latest values in m_p_pad and n_p_pad.
|
||||
bl2_mem_alloc_update_m( m_p_pad,
|
||||
n_p_pad,
|
||||
elem_size_p,
|
||||
pack_buf_type,
|
||||
mem_p );
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = m_p_pad * n_p_pad * bl2_obj_elem_size( *p );
|
||||
|
||||
if ( bl2_mem_is_unalloc( mem_p ) )
|
||||
{
|
||||
// If the mem_t object of p has not yet been allocated, then acquire
|
||||
// a memory block of type pack_buf_type.
|
||||
bl2_mem_acquire_m( size_p,
|
||||
pack_buf_type,
|
||||
mem_p );
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the mem_t object is currently allocated and smaller than is
|
||||
// needed, then something is very wrong, since the cache blocksizes
|
||||
// that drive the level-3 blocked algorithms are the same ones that
|
||||
// determine the sizes of the blocks within our memory allocator's
|
||||
// memory pools. This branch should never be executed.
|
||||
if ( bl2_mem_size( mem_p ) < size_p ) bl2_abort();
|
||||
}
|
||||
|
||||
// Save the padded (packed) dimensions into the packed object. It is
|
||||
// important to save these dimensions since they represent the actual
|
||||
// dimensions of the zero-padded matrix.
|
||||
bl2_obj_set_packed_dims( m_p_pad, n_p_pad, *p );
|
||||
|
||||
// Grab the buffer address from the mem_t object and copy it to the
|
||||
// main object buffer field. (Sometimes this buffer address will be
|
||||
@@ -250,24 +266,24 @@ void bl2_packm_init_pack( bool_t densify,
|
||||
// Set the row and column strides of p based on the pack schema.
|
||||
if ( pack_schema == BLIS_PACKED_ROWS )
|
||||
{
|
||||
// For regular row storage, the padded n dimension used when
|
||||
// acquiring the pack memory should be used for our row stride,
|
||||
// with the column stride set to one. By using the WIDTH of the mem_t
|
||||
// region, we allow for zero-padding (if necessary/desired) along
|
||||
// the right edge of the matrix.
|
||||
rs_p = bl2_mem_width( mem_p );
|
||||
// For regular row storage, the packed width of our mem_t region
|
||||
// should be used for the row stride, with the column stride set
|
||||
// to one. By using the WIDTH of the mem_t region, we allow for
|
||||
// zero-padding (if necessary/desired) along the right edge of
|
||||
// the matrix.
|
||||
rs_p = bl2_obj_packed_width( *p );
|
||||
cs_p = 1;
|
||||
|
||||
bl2_obj_set_incs( rs_p, cs_p, *p );
|
||||
}
|
||||
else if ( pack_schema == BLIS_PACKED_COLUMNS )
|
||||
{
|
||||
// For regular column storage, the padded m dimension used when
|
||||
// acquiring the pack memory should be used for our column stride,
|
||||
// with the row stride set to one. By using the LENGTH of the mem_t
|
||||
// region, we allow for zero-padding (if necessary/desired) along
|
||||
// the bottom edge of the matrix.
|
||||
cs_p = bl2_mem_length( mem_p );
|
||||
// For regular column storage, the packed length of our mem_t region
|
||||
// should be used for the column stride, with the row stride set
|
||||
// to one. By using the LENGTH of the mem_t region, we allow for
|
||||
// zero-padding (if necessary/desired) along the bottom edge of
|
||||
// the matrix.
|
||||
cs_p = bl2_obj_packed_length( *p );
|
||||
rs_p = 1;
|
||||
|
||||
bl2_obj_set_incs( rs_p, cs_p, *p );
|
||||
@@ -292,11 +308,11 @@ void bl2_packm_init_pack( bool_t densify,
|
||||
|
||||
// The "panel stride" of a panel packed object is interpreted as the
|
||||
// distance between the (0,0) element of panel k and the (0,0)
|
||||
// element of panel k+1. We use the WIDTH of the mem_t region to
|
||||
// determine the panel "width"; this will allow for zero-padding
|
||||
// element of panel k+1. We use the WIDTH of the packed mem_t region
|
||||
// to determine the panel "width"; this will allow for zero-padding
|
||||
// (if necessary/desired) along the far end of each panel (ie: the
|
||||
// right edge of the matrix).
|
||||
ps_p = cs_p * bl2_mem_width( mem_p );
|
||||
ps_p = cs_p * bl2_obj_packed_width( *p );
|
||||
|
||||
// Store the strides in p.
|
||||
bl2_obj_set_incs( rs_p, cs_p, *p );
|
||||
@@ -322,11 +338,11 @@ void bl2_packm_init_pack( bool_t densify,
|
||||
|
||||
// The "panel stride" of a panel packed object is interpreted as the
|
||||
// distance between the (0,0) element of panel k and the (0,0)
|
||||
// element of panel k+1. We use the LENGTH of the mem_t region to
|
||||
// determine the panel "length"; this will allow for zero-padding
|
||||
// element of panel k+1. We use the LENGTH of the packed mem_t region
|
||||
// to determine the panel "length"; this will allow for zero-padding
|
||||
// (if necessary/desired) along the far end of each panel (ie: the
|
||||
// bottom edge of the matrix).
|
||||
ps_p = bl2_mem_length( mem_p ) * rs_p;
|
||||
ps_p = bl2_obj_packed_length( *p ) * rs_p;
|
||||
|
||||
// Store the strides in p.
|
||||
bl2_obj_set_incs( rs_p, cs_p, *p );
|
||||
|
||||
@@ -74,22 +74,23 @@ void bl2_packm_acquire_mpart_t2b( subpart_t requested_part,
|
||||
// Modify offsets and dimensions of requested partition.
|
||||
bl2_obj_set_dims( b, n, *sub_obj );
|
||||
|
||||
// Tweak the width of the pack_mem region of the subpartition to trick
|
||||
// the underlying implementation into only zero-padding for the narrow
|
||||
// submatrix of interest. Usually, the value we want is b (for non-edge
|
||||
// cases), but at the edges, we want the remainder of the mem_t region
|
||||
// in the m dimension. Edge cases are defined as occurring when i + b is
|
||||
// exactly equal to the length of the parent object. In these cases, we
|
||||
// arrive at the new pack_mem region width by simply subtracting off i.
|
||||
// Tweak the packed length of the subpartition to trick the underlying
|
||||
// implementation into only zero-padding for the narrow submatrix of
|
||||
// interest. Usually, the value we want is b (for non-edge cases), but
|
||||
// at the edges, we want the remainder of the mem_t region in the m
|
||||
// dimension. Edge cases are defined as occurring when i + b is exactly
|
||||
// equal to the inherited sub-object's length (which happens since the
|
||||
// determine_blocksize function would have returned a smaller value of
|
||||
// b for the edge iteration). In these cases, we arrive at the new
|
||||
// packed length by simply subtracting off i.
|
||||
{
|
||||
mem_t* pack_mem = bl2_obj_pack_mem( *sub_obj );
|
||||
dim_t m_max = bl2_mem_length( pack_mem );
|
||||
dim_t m_mem;
|
||||
dim_t m_pack_max = bl2_obj_packed_length( *sub_obj );
|
||||
dim_t m_pack_cur;
|
||||
|
||||
if ( i + b == m ) m_mem = m_max - i;
|
||||
else m_mem = b;
|
||||
if ( i + b == m ) m_pack_cur = m_pack_max - i;
|
||||
else m_pack_cur = b;
|
||||
|
||||
bl2_mem_set_length( m_mem, pack_mem );
|
||||
bl2_obj_set_packed_length( m_pack_cur, *sub_obj );
|
||||
}
|
||||
|
||||
// Translate the desired offsets to a panel offset and adjust the
|
||||
@@ -97,13 +98,16 @@ void bl2_packm_acquire_mpart_t2b( subpart_t requested_part,
|
||||
{
|
||||
char* buf_p = bl2_obj_buffer( *sub_obj );
|
||||
siz_t elem_size = bl2_obj_elem_size( *sub_obj );
|
||||
inc_t cs_p = bl2_obj_col_stride( *sub_obj );
|
||||
dim_t off_to_elem = i * cs_p;
|
||||
dim_t off_to_panel = bl2_packm_offset_to_panel_for( i, sub_obj );
|
||||
|
||||
buf_p = buf_p + elem_size * off_to_elem;
|
||||
buf_p = buf_p + elem_size * off_to_panel;
|
||||
|
||||
bl2_obj_set_buffer( ( void* )buf_p, *sub_obj );
|
||||
}
|
||||
|
||||
// Don't have any code that utilizes this function yet. This abort is
|
||||
// here to force someone to make sure the above works!
|
||||
bl2_abort();
|
||||
}
|
||||
|
||||
|
||||
@@ -148,40 +152,23 @@ void bl2_packm_acquire_mpart_l2r( subpart_t requested_part,
|
||||
// Modify offsets and dimensions of requested partition.
|
||||
bl2_obj_set_dims( m, b, *sub_obj );
|
||||
|
||||
/* DON'T NEED THIS NOW THAT COPYING IS DONE IN _INIT_SUBPART_FROM().
|
||||
// Copy the pack_mem and cast_mem entries.
|
||||
// Tweak the packed width of the subpartition to trick the underlying
|
||||
// implementation into only zero-padding for the narrow submatrix of
|
||||
// interest. Usually, the value we want is b (for non-edge cases), but
|
||||
// at the edges, we want the remainder of the mem_t region in the n
|
||||
// dimension. Edge cases are defined as occurring when j + b is exactly
|
||||
// equal to the inherited sub-object's width (which happens since the
|
||||
// determine_blocksize function would have returned a smaller value of
|
||||
// b for the edge iteration). In these cases, we arrive at the new
|
||||
// packed width by simply subtracting off j.
|
||||
{
|
||||
mem_t* pack_mem = bl2_obj_pack_mem( *obj );
|
||||
mem_t* cast_mem = bl2_obj_cast_mem( *obj );
|
||||
dim_t n_pack_max = bl2_obj_packed_width( *sub_obj );
|
||||
dim_t n_pack_cur;
|
||||
|
||||
bl2_obj_set_pack_mem( pack_mem, *sub_obj );
|
||||
bl2_obj_set_cast_mem( cast_mem, *sub_obj );
|
||||
}
|
||||
if ( j + b == n ) n_pack_cur = n_pack_max - j;
|
||||
else n_pack_cur = b;
|
||||
|
||||
// Copy the panel stride from the original object.
|
||||
{
|
||||
inc_t ps = bl2_obj_panel_stride( *obj );
|
||||
|
||||
bl2_obj_set_panel_stride( ps, *sub_obj );
|
||||
}
|
||||
*/
|
||||
|
||||
// Tweak the width of the pack_mem region of the subpartition to trick
|
||||
// the underlying implementation into only zero-padding for the narrow
|
||||
// submatrix of interest. Usually, the value we want is b (for non-edge
|
||||
// cases), but at the edges, we want the remainder of the mem_t region
|
||||
// in the n dimension. Edge cases are defined as occurring when j + b is
|
||||
// exactly equal to the width of the parent object. In these cases, we
|
||||
// arrive at the new pack_mem region width by simply subtracting off j.
|
||||
{
|
||||
mem_t* pack_mem = bl2_obj_pack_mem( *sub_obj );
|
||||
dim_t n_max = bl2_mem_width( pack_mem );
|
||||
dim_t n_mem;
|
||||
|
||||
if ( j + b == n ) n_mem = n_max - j;
|
||||
else n_mem = b;
|
||||
|
||||
bl2_mem_set_width( n_mem, pack_mem );
|
||||
bl2_obj_set_packed_width( n_pack_cur, *sub_obj );
|
||||
}
|
||||
|
||||
// Translate the desired offsets to a panel offset and adjust the
|
||||
|
||||
@@ -60,7 +60,6 @@ void bl2_packm_unb_var1( obj_t* beta,
|
||||
obj_t* p )
|
||||
{
|
||||
num_t dt_cp = bl2_obj_datatype( *c );
|
||||
mem_t* mem_p = bl2_obj_pack_mem( *p );
|
||||
|
||||
struc_t strucc = bl2_obj_struc( *c );
|
||||
doff_t diagoffc = bl2_obj_diag_offset( *c );
|
||||
@@ -71,8 +70,8 @@ void bl2_packm_unb_var1( obj_t* beta,
|
||||
|
||||
dim_t m_p = bl2_obj_length( *p );
|
||||
dim_t n_p = bl2_obj_width( *p );
|
||||
dim_t m_max_p = bl2_mem_length( mem_p );
|
||||
dim_t n_max_p = bl2_mem_width( mem_p );
|
||||
dim_t m_max_p = bl2_obj_packed_length( *p );
|
||||
dim_t n_max_p = bl2_obj_packed_width( *p );
|
||||
|
||||
void* buf_c = bl2_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bl2_obj_row_stride( *c );
|
||||
|
||||
@@ -61,7 +61,6 @@ void bl2_packm_blk_var1( obj_t* beta,
|
||||
packm_t* cntl )
|
||||
{
|
||||
num_t dt_cp = bl2_obj_datatype( *c );
|
||||
mem_t* mem_p = bl2_obj_pack_mem( *p );
|
||||
|
||||
struc_t strucc = bl2_obj_struc( *c );
|
||||
doff_t diagoffc = bl2_obj_diag_offset( *c );
|
||||
@@ -72,8 +71,8 @@ void bl2_packm_blk_var1( obj_t* beta,
|
||||
|
||||
dim_t m_p = bl2_obj_length( *p );
|
||||
dim_t n_p = bl2_obj_width( *p );
|
||||
dim_t m_max_p = bl2_mem_length( mem_p );
|
||||
dim_t n_max_p = bl2_mem_width( mem_p );
|
||||
dim_t m_max_p = bl2_obj_packed_length( *p );
|
||||
dim_t n_max_p = bl2_obj_packed_width( *p );
|
||||
|
||||
void* buf_c = bl2_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bl2_obj_row_stride( *c );
|
||||
|
||||
@@ -101,13 +101,10 @@ static char pool_mn_mem[ BLIS_MN_POOL_SIZE ];
|
||||
|
||||
|
||||
|
||||
void bl2_mem_acquire_m( dim_t m_req,
|
||||
dim_t n_req,
|
||||
siz_t elem_size,
|
||||
void bl2_mem_acquire_m( siz_t req_size,
|
||||
packbuf_t buf_type,
|
||||
mem_t* mem )
|
||||
{
|
||||
siz_t req_size;
|
||||
siz_t block_size;
|
||||
dim_t pool_index;
|
||||
pool_t* pool;
|
||||
@@ -116,9 +113,6 @@ void bl2_mem_acquire_m( dim_t m_req,
|
||||
int i;
|
||||
|
||||
|
||||
// Compute the size of the requested contiguous memory region.
|
||||
req_size = m_req * n_req * elem_size;
|
||||
|
||||
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
|
||||
{
|
||||
// For general-use buffer requests, such as those used by level-2
|
||||
@@ -128,17 +122,13 @@ void bl2_mem_acquire_m( dim_t m_req,
|
||||
|
||||
// Initialize the mem_t object with:
|
||||
// - the address of the memory block,
|
||||
// - the buffer type (a packbuf_t value),
|
||||
// - the size of the requested region, and
|
||||
// - the requested dimensions, which are presumably already aligned to
|
||||
// dimension multiples (typically equal to register blocksizes).
|
||||
// - the buffer type (a packbuf_t value), and
|
||||
// - the size of the requested region.
|
||||
// NOTE: We do not initialize the pool field since this block did not
|
||||
// come from a contiguous memory pool.
|
||||
bl2_mem_set_buffer( block, mem );
|
||||
bl2_mem_set_buf_type( buf_type, mem );
|
||||
bl2_mem_set_size( req_size, mem );
|
||||
bl2_mem_set_dims( m_req, n_req, mem );
|
||||
bl2_mem_set_elem_size( elem_size, mem );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -197,17 +187,13 @@ void bl2_mem_acquire_m( dim_t m_req,
|
||||
// Initialize the mem_t object with:
|
||||
// - the address of the memory block,
|
||||
// - the buffer type (a packbuf_t value),
|
||||
// - the address of the memory pool to which it belongs,
|
||||
// - the address of the memory pool to which it belongs, and
|
||||
// - the size of the contiguous memory block (NOT the size of the
|
||||
// requested region), and
|
||||
// - the requested dimensions, which are presumably already aligned to
|
||||
// dimension multiples (typically equal to register blocksizes).
|
||||
// requested region).
|
||||
bl2_mem_set_buffer( block, mem );
|
||||
bl2_mem_set_buf_type( buf_type, mem );
|
||||
bl2_mem_set_pool( pool, mem );
|
||||
bl2_mem_set_size( block_size, mem );
|
||||
bl2_mem_set_dims( m_req, n_req, mem );
|
||||
bl2_mem_set_elem_size( elem_size, mem );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -264,26 +250,20 @@ void bl2_mem_release( mem_t* mem )
|
||||
|
||||
// Clear the mem_t object so that it appears unallocated. We clear:
|
||||
// - the buffer field,
|
||||
// - the pool field,
|
||||
// - the size field, and
|
||||
// - the dimension fields.
|
||||
// - the pool field, and
|
||||
// - the size field.
|
||||
// NOTE: We do not clear the buf_type field since there is no
|
||||
// "uninitialized" value for packbuf_t.
|
||||
bl2_mem_set_buffer( NULL, mem );
|
||||
bl2_mem_set_pool( NULL, mem );
|
||||
bl2_mem_set_size( 0, mem );
|
||||
bl2_mem_set_dims( 0, 0, mem );
|
||||
bl2_mem_set_elem_size( 0, mem );
|
||||
}
|
||||
|
||||
|
||||
void bl2_mem_acquire_v( dim_t m_req,
|
||||
siz_t elem_size,
|
||||
mem_t* mem )
|
||||
void bl2_mem_acquire_v( siz_t req_size,
|
||||
mem_t* mem )
|
||||
{
|
||||
bl2_mem_acquire_m( m_req,
|
||||
1,
|
||||
elem_size,
|
||||
bl2_mem_acquire_m( req_size,
|
||||
BLIS_BUFFER_FOR_GEN_USE,
|
||||
mem );
|
||||
}
|
||||
|
||||
@@ -32,14 +32,11 @@
|
||||
|
||||
*/
|
||||
|
||||
void bl2_mem_acquire_m( dim_t m_req,
|
||||
dim_t n_req,
|
||||
siz_t elem_size,
|
||||
void bl2_mem_acquire_m( siz_t req_size,
|
||||
packbuf_t buf_type,
|
||||
mem_t* mem );
|
||||
|
||||
void bl2_mem_acquire_v( dim_t m_req,
|
||||
siz_t elem_size,
|
||||
void bl2_mem_acquire_v( siz_t req_size,
|
||||
mem_t* mem );
|
||||
|
||||
void bl2_mem_release( mem_t* mem );
|
||||
|
||||
@@ -478,16 +478,12 @@ void bl2_obj_print( char* label, obj_t* obj )
|
||||
fprintf( file, " elem size %lu\n", bl2_obj_elem_size( *obj ) );
|
||||
fprintf( file, " rs, cs %lu, %lu\n", bl2_obj_row_stride( *obj ),
|
||||
bl2_obj_col_stride( *obj ) );
|
||||
//fprintf( file, " cast_mem \n" );
|
||||
//fprintf( file, " - buf %p\n", bl2_mem_buffer( cast_mem ) );
|
||||
//fprintf( file, " - m %lu\n", bl2_mem_length( cast_mem ) );
|
||||
//fprintf( file, " - n %lu\n", bl2_mem_width( cast_mem ) );
|
||||
fprintf( file, " pack_mem \n" );
|
||||
fprintf( file, " - buf %p\n", bl2_mem_buffer( pack_mem ) );
|
||||
fprintf( file, " - buf_type %u\n", bl2_mem_buf_type( pack_mem ) );
|
||||
fprintf( file, " - size %lu\n", bl2_mem_size( pack_mem ) );
|
||||
fprintf( file, " - m used %lu\n", bl2_mem_length( pack_mem ) );
|
||||
fprintf( file, " - n used %lu\n", bl2_mem_width( pack_mem ) );
|
||||
fprintf( file, " m_packed %lu\n", bl2_obj_packed_length( *obj ) );
|
||||
fprintf( file, " n_packed %lu\n", bl2_obj_packed_width( *obj ) );
|
||||
fprintf( file, " ps %lu\n", bl2_obj_panel_stride( *obj ) );
|
||||
fprintf( file, "\n" );
|
||||
|
||||
|
||||
@@ -54,18 +54,6 @@
|
||||
\
|
||||
( (mem_p)->size )
|
||||
|
||||
#define bl2_mem_length( mem_p ) \
|
||||
\
|
||||
( (mem_p)->m )
|
||||
|
||||
#define bl2_mem_width( mem_p ) \
|
||||
\
|
||||
( (mem_p)->n )
|
||||
|
||||
#define bl2_mem_elem_size( mem_p ) \
|
||||
\
|
||||
( (mem_p)->elem_size )
|
||||
|
||||
#define bl2_mem_is_alloc( mem_p ) \
|
||||
\
|
||||
( bl2_mem_buffer( mem_p ) != NULL )
|
||||
@@ -97,134 +85,5 @@
|
||||
mem_p->size = size0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_length( m0, mem_p ) \
|
||||
{ \
|
||||
mem_p->m = m0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_width( n0, mem_p ) \
|
||||
{ \
|
||||
mem_p->n = n0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_elem_size( elem_size0, mem_p ) \
|
||||
{ \
|
||||
mem_p->elem_size = elem_size0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_dims( m0, n0, mem_p ) \
|
||||
{ \
|
||||
bl2_mem_set_length( m0, mem_p ); \
|
||||
bl2_mem_set_width( n0, mem_p ); \
|
||||
}
|
||||
|
||||
|
||||
// Allocate a mem_t object if it is unallocated, or update its dimensions
|
||||
// if it is allocated. This macro is used for matrices.
|
||||
|
||||
#define bl2_mem_alloc_update_m( m_padded, n_padded, elem_size, buf_type, mem_p ) \
|
||||
{ \
|
||||
bool_t needs_alloc; \
|
||||
siz_t size_needed; \
|
||||
\
|
||||
if ( bl2_mem_is_unalloc( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently unallocated (NULL), mark it for
|
||||
allocation. */ \
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Compute the total buffer size needed. */ \
|
||||
size_needed = m_padded * n_padded * elem_size; \
|
||||
\
|
||||
if ( size_needed <= bl2_mem_size( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated, AND what is
|
||||
allocated and available is equal to or greater than what is
|
||||
needed, then set the dimensions according to how much we
|
||||
need. This allows us to avoid unnecessarily releasing and
|
||||
re-allocating when all we need is a subset of what is already
|
||||
available. This case will occur when, for example, handling
|
||||
both forward and backward edge cases. */ \
|
||||
bl2_mem_set_dims( m_padded, n_padded, mem_p ); \
|
||||
\
|
||||
needs_alloc = FALSE; \
|
||||
} \
|
||||
else /* if ( bl2_mem_size( mem_p ) < size_needed ) */ \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated and smaller than is
|
||||
needed, then something is very wrong, since the cache blocksizes
|
||||
that drive the level-3 blocked algorithms are the same ones that
|
||||
determine the sizes of the blocks within our memory allocator's
|
||||
memory pools. This branch should never be executed. */ \
|
||||
bl2_abort(); \
|
||||
\
|
||||
needs_alloc = FALSE; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if ( needs_alloc ) \
|
||||
{ \
|
||||
bl2_mem_acquire_m( m_padded, \
|
||||
n_padded, \
|
||||
elem_size, \
|
||||
buf_type, \
|
||||
mem_p ); \
|
||||
} \
|
||||
} \
|
||||
|
||||
|
||||
// Allocate a mem_t object if it is unallocated, or update its dimensions
|
||||
// if it is allocated. This macro is used for vectors.
|
||||
|
||||
#define bl2_mem_alloc_update_v( m_padded, elem_size, mem_p ) \
|
||||
{ \
|
||||
bool_t needs_alloc; \
|
||||
siz_t size_needed; \
|
||||
\
|
||||
if ( bl2_mem_is_unalloc( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently unallocated (NULL), mark it for
|
||||
allocation. */ \
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Compute the total buffer size needed. */ \
|
||||
size_needed = m_padded * elem_size; \
|
||||
\
|
||||
if ( size_needed <= bl2_mem_size( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated, AND what is
|
||||
allocated and available is equal to or larger than what is
|
||||
needed, then set the dimension according to how much we
|
||||
need. This allows us to avoid unnecessarily releasing and
|
||||
re-allocating when all we need is a subset of what is already
|
||||
available. This case will occur when, for example, handling
|
||||
both forward and backward edge cases. */ \
|
||||
bl2_mem_set_dims( m_padded, 1, mem_p ); \
|
||||
\
|
||||
needs_alloc = FALSE; \
|
||||
} \
|
||||
else /* if ( bl2_mem_size( mem_p ) < size_needed ) */ \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated and smaller than is
|
||||
needed, then release the memory and re-allocate. */ \
|
||||
bl2_mem_release( mem_p ); \
|
||||
\
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if ( needs_alloc ) \
|
||||
{ \
|
||||
bl2_mem_acquire_v( m_padded, \
|
||||
elem_size, \
|
||||
mem_p ); \
|
||||
} \
|
||||
} \
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -570,21 +570,6 @@ bl2_obj_width_stored( obj )
|
||||
}
|
||||
|
||||
|
||||
// Panel stride query
|
||||
|
||||
#define bl2_obj_panel_stride( obj ) \
|
||||
\
|
||||
((obj).ps)
|
||||
|
||||
|
||||
// Panel stride modification
|
||||
|
||||
#define bl2_obj_set_panel_stride( panel_stride, obj ) \
|
||||
{ \
|
||||
(obj).ps = panel_stride; \
|
||||
}
|
||||
|
||||
|
||||
// Offset query
|
||||
|
||||
#define bl2_obj_row_offset( obj ) \
|
||||
@@ -709,13 +694,13 @@ bl2_obj_width_stored( obj )
|
||||
}
|
||||
|
||||
|
||||
// Pack mem entry query
|
||||
// Pack mem_t entry query
|
||||
|
||||
#define bl2_obj_pack_mem( obj ) \
|
||||
\
|
||||
( &((obj).pack_mem) )
|
||||
|
||||
// Pack mem entry modification
|
||||
// Pack mem_t entry modification
|
||||
|
||||
#define bl2_obj_set_pack_mem( mem_p, obj ) \
|
||||
{ \
|
||||
@@ -723,6 +708,50 @@ bl2_obj_width_stored( obj )
|
||||
}
|
||||
|
||||
|
||||
// Packed dimensions query
|
||||
|
||||
#define bl2_obj_packed_length( obj ) \
|
||||
\
|
||||
( (obj).m_packed )
|
||||
|
||||
#define bl2_obj_packed_width( obj ) \
|
||||
\
|
||||
( (obj).n_packed )
|
||||
|
||||
// Packed dimensions modification
|
||||
|
||||
#define bl2_obj_set_packed_length( m0, obj ) \
|
||||
{ \
|
||||
(obj).m_packed = m0; \
|
||||
}
|
||||
|
||||
#define bl2_obj_set_packed_width( n0, obj ) \
|
||||
{ \
|
||||
(obj).n_packed = n0; \
|
||||
}
|
||||
|
||||
#define bl2_obj_set_packed_dims( m0, n0, obj ) \
|
||||
{ \
|
||||
bl2_obj_set_packed_length( m0, obj ); \
|
||||
bl2_obj_set_packed_width( n0, obj ); \
|
||||
}
|
||||
|
||||
|
||||
// Packed panel stride query
|
||||
|
||||
#define bl2_obj_panel_stride( obj ) \
|
||||
\
|
||||
((obj).ps)
|
||||
|
||||
// Packed panel stride modification
|
||||
|
||||
#define bl2_obj_set_panel_stride( panel_stride, obj ) \
|
||||
{ \
|
||||
(obj).ps = panel_stride; \
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
// Cast mem entry query
|
||||
|
||||
#define bl2_obj_cast_mem( obj ) \
|
||||
@@ -735,48 +764,6 @@ bl2_obj_width_stored( obj )
|
||||
{ \
|
||||
(obj).cast_mem = *mem_p; \
|
||||
}
|
||||
|
||||
/*
|
||||
// Mem entry query
|
||||
|
||||
#define bl2_mem_buffer( mem_p ) \
|
||||
\
|
||||
( (mem_p)->buf )
|
||||
|
||||
#define bl2_mem_elem_size( mem_p ) \
|
||||
\
|
||||
( (mem_p)->elem_size )
|
||||
|
||||
#define bl2_mem_length( mem_p ) \
|
||||
\
|
||||
( (mem_p)->m )
|
||||
|
||||
#define bl2_mem_width( mem_p ) \
|
||||
\
|
||||
( (mem_p)->n )
|
||||
|
||||
|
||||
// Mem entry modification
|
||||
|
||||
#define bl2_mem_set_buffer( buf0, mem_p ) \
|
||||
{ \
|
||||
mem_p->buf = buf0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_elem_size( elem_size0, mem_p ) \
|
||||
{ \
|
||||
mem_p->elem_size = elem_size0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_length( m0, mem_p ) \
|
||||
{ \
|
||||
mem_p->m = m0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_width( n0, mem_p ) \
|
||||
{ \
|
||||
mem_p->n = n0; \
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
@@ -275,9 +275,6 @@ typedef struct mem_s
|
||||
packbuf_t buf_type;
|
||||
pool_t* pool;
|
||||
siz_t size;
|
||||
dim_t m;
|
||||
dim_t n;
|
||||
siz_t elem_size;
|
||||
} mem_t;
|
||||
|
||||
// Blocksize object type
|
||||
@@ -376,9 +373,12 @@ typedef struct obj_s
|
||||
|
||||
// Pack-related fields
|
||||
mem_t pack_mem; // cached memory region for packing
|
||||
//mem_t cast_mem; // cached memory region for casting
|
||||
dim_t m_packed;
|
||||
dim_t n_packed;
|
||||
inc_t ps; // panel stride (distance to next panel)
|
||||
|
||||
//mem_t cast_mem; // cached memory region for casting
|
||||
|
||||
} obj_t;
|
||||
|
||||
|
||||
@@ -402,10 +402,10 @@ typedef struct obj_s
|
||||
(b).cs = (a).cs; \
|
||||
\
|
||||
/* We must NOT copy pack_mem field since this macro forms the basis of
|
||||
bl2_obj_alias_to(), which is used in packm. There, we want to copy
|
||||
over the basic fields of the obj_t but PRESERVE the pack_mem field
|
||||
of the destination object since it holds the cached mem_t buffer
|
||||
(and dimensions). */ \
|
||||
bl2_obj_alias_to(), which is used in packm_init(). There, we want to
|
||||
copy the basic fields of the obj_t but PRESERVE the pack_mem field
|
||||
(and the corresponding dimensions and stride) of the destination
|
||||
object since it holds the cached mem_t object and buffer. */ \
|
||||
}
|
||||
|
||||
#define bl2_obj_init_subpart_from( a, b ) \
|
||||
@@ -428,10 +428,13 @@ typedef struct obj_s
|
||||
/* We want to copy the pack_mem field here because this macro is used
|
||||
when creating subpartitions, including those of packed objects. In
|
||||
those situations, we want the subpartition to inherit the pack_mem
|
||||
field of its parent. */ \
|
||||
field, and the corresponding packed dimensions, of its parent. */ \
|
||||
(b).pack_mem = (a).pack_mem; \
|
||||
/*(b).cast_mem = (a).cast_mem;*/ \
|
||||
(b).m_packed = (a).m_packed; \
|
||||
(b).n_packed = (a).n_packed; \
|
||||
(b).ps = (a).ps; \
|
||||
\
|
||||
/*(b).cast_mem = (a).cast_mem;*/ \
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user