mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Enhanced tracking of dimensions for mem_t objects.
Details: - Added new fields to mem_t struct definition to track the allocated (as opposed to the currently used) dimensions of the memory region. This allows packm_init() to be more robust in situations where memory is already allocated but is more than needed for the current packing job. - Updated logic in bl2_obj_set_buffer_with_cached_packm_mem() macro, used in packm_init(), to update the "currently used" dimensions of the mem_t object if the requested dimensions are smaller than the allocated dimensions.
This commit is contained in:
@@ -81,6 +81,7 @@ void bl2_mm_acquire_v( num_t dt,
|
||||
|
||||
bl2_mem_set_buffer( buf, mem );
|
||||
|
||||
bl2_mem_set_dims_alloc( m, 1, mem );
|
||||
bl2_mem_set_dims( m, 1, mem );
|
||||
}
|
||||
|
||||
@@ -115,6 +116,7 @@ void bl2_mm_acquire_m( num_t dt,
|
||||
|
||||
bl2_mem_set_buffer( buf, mem );
|
||||
|
||||
bl2_mem_set_dims_alloc( m, n, mem );
|
||||
bl2_mem_set_dims( m, n, mem );
|
||||
}
|
||||
|
||||
@@ -132,6 +134,7 @@ void bl2_mm_release( mem_t* mem )
|
||||
bl2_mem_set_buffer( NULL, mem );
|
||||
|
||||
// Set the dimensions to zero (just because we are polite).
|
||||
bl2_mem_set_dims_alloc( 0, 0, mem );
|
||||
bl2_mem_set_dims( 0, 0, mem );
|
||||
}
|
||||
|
||||
|
||||
@@ -464,7 +464,7 @@ void bl2_obj_print( char* label, obj_t* obj )
|
||||
{
|
||||
FILE* file = stdout;
|
||||
mem_t* pack_mem = bl2_obj_pack_mem( *obj );
|
||||
mem_t* cast_mem = bl2_obj_cast_mem( *obj );
|
||||
//mem_t* cast_mem = bl2_obj_cast_mem( *obj );
|
||||
|
||||
if ( bl2_error_checking_is_enabled() )
|
||||
bl2_obj_print_check( label, obj );
|
||||
@@ -486,14 +486,16 @@ void bl2_obj_print( char* label, obj_t* obj )
|
||||
fprintf( file, " elem size %lu\n", bl2_obj_elem_size( *obj ) );
|
||||
fprintf( file, " rs, cs %lu, %lu\n", bl2_obj_row_stride( *obj ),
|
||||
bl2_obj_col_stride( *obj ) );
|
||||
fprintf( file, " cast_mem \n" );
|
||||
fprintf( file, " - buf %p\n", bl2_mem_buffer( cast_mem ) );
|
||||
fprintf( file, " - m %lu\n", bl2_mem_length( cast_mem ) );
|
||||
fprintf( file, " - n %lu\n", bl2_mem_width( cast_mem ) );
|
||||
//fprintf( file, " cast_mem \n" );
|
||||
//fprintf( file, " - buf %p\n", bl2_mem_buffer( cast_mem ) );
|
||||
//fprintf( file, " - m %lu\n", bl2_mem_length( cast_mem ) );
|
||||
//fprintf( file, " - n %lu\n", bl2_mem_width( cast_mem ) );
|
||||
fprintf( file, " pack_mem \n" );
|
||||
fprintf( file, " - buf %p\n", bl2_mem_buffer( pack_mem ) );
|
||||
fprintf( file, " - m %lu\n", bl2_mem_length( pack_mem ) );
|
||||
fprintf( file, " - n %lu\n", bl2_mem_width( pack_mem ) );
|
||||
fprintf( file, " - m allocated %lu\n", bl2_mem_length_alloc( pack_mem ) );
|
||||
fprintf( file, " - n allocated %lu\n", bl2_mem_width_alloc( pack_mem ) );
|
||||
fprintf( file, " - m used %lu\n", bl2_mem_length( pack_mem ) );
|
||||
fprintf( file, " - n used %lu\n", bl2_mem_width( pack_mem ) );
|
||||
fprintf( file, " ps %lu\n", bl2_obj_panel_stride( *obj ) );
|
||||
fprintf( file, "\n" );
|
||||
|
||||
|
||||
@@ -50,6 +50,14 @@
|
||||
\
|
||||
( (mem_p)->n )
|
||||
|
||||
#define bl2_mem_length_alloc( mem_p ) \
|
||||
\
|
||||
( (mem_p)->m_alloc )
|
||||
|
||||
#define bl2_mem_width_alloc( mem_p ) \
|
||||
\
|
||||
( (mem_p)->n_alloc )
|
||||
|
||||
#define bl2_mem_is_alloc( mem_p ) \
|
||||
\
|
||||
( bl2_mem_buffer( mem_p ) != NULL )
|
||||
@@ -82,4 +90,20 @@
|
||||
bl2_mem_set_width( n0, mem_p ); \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_length_alloc( m0, mem_p ) \
|
||||
{ \
|
||||
mem_p->m_alloc = m0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_width_alloc( n0, mem_p ) \
|
||||
{ \
|
||||
mem_p->n_alloc = n0; \
|
||||
}
|
||||
|
||||
#define bl2_mem_set_dims_alloc( m0, n0, mem_p ) \
|
||||
{ \
|
||||
bl2_mem_set_length_alloc( m0, mem_p ); \
|
||||
bl2_mem_set_width_alloc( n0, mem_p ); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -860,20 +860,31 @@ bl2_obj_width_stored( obj )
|
||||
m_needed = bl2_align_dim( m, mult_m, elem_size ); \
|
||||
n_needed = bl2_align_dim( n, mult_n, elem_size ); \
|
||||
\
|
||||
/* If the pack_mem buffer is NULL, or if it is non-NULL but currently too
|
||||
small for the matrix represented by obj, we set a flag to allocate a
|
||||
memory region. (In the latter case, we first release the previous
|
||||
buffer.) If neither of those conditions is met, then we can assume
|
||||
the buffer exists and is sufficiently large. */ \
|
||||
if ( bl2_mem_buffer( mem_p ) == NULL ) \
|
||||
if ( bl2_mem_is_unalloc( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently unallocated (NULL), mark it for
|
||||
allocation. */ \
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bl2_mem_length( mem_p ) < m_needed || \
|
||||
bl2_mem_width( mem_p ) < n_needed ) \
|
||||
if ( m_needed <= bl2_mem_length_alloc( mem_p ) && \
|
||||
n_needed <= bl2_mem_width_alloc( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated, AND what is
|
||||
allocated and available is equal to or larger than what is
|
||||
needed, then set the dimensions according to how much we
|
||||
need. This allows us to avoid unnecessarily releasing and
|
||||
re-allocating when all we need is a subset of what is already
|
||||
available. */ \
|
||||
bl2_mem_set_dims( m_needed, n_needed, mem_p ); \
|
||||
needs_alloc = FALSE; \
|
||||
} \
|
||||
else if ( bl2_mem_length_alloc( mem_p ) < m_needed || \
|
||||
bl2_mem_width_alloc( mem_p ) < n_needed ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated and smaller than is
|
||||
needed, release the memory and mark for re-allocation. */ \
|
||||
bl2_mm_release( mem_p ); \
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
@@ -888,8 +899,8 @@ bl2_obj_width_stored( obj )
|
||||
bl2_mm_acquire_m( dt, m_needed, n_needed, mem_p ); \
|
||||
} \
|
||||
\
|
||||
/* Grab the buffer from the mem_t object and copy it to the main object
|
||||
buffer. */ \
|
||||
/* Grab the buffer address from the mem_t object and copy it to the main
|
||||
object buffer. */ \
|
||||
buf = bl2_mem_buffer( mem_p ); \
|
||||
bl2_obj_set_buffer( buf, obj ); \
|
||||
} \
|
||||
@@ -914,12 +925,27 @@ bl2_obj_width_stored( obj )
|
||||
\
|
||||
if ( bl2_mem_is_unalloc( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently unallocated (NULL), mark it for
|
||||
allocation. */ \
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bl2_mem_length( mem_p ) < m_needed ) \
|
||||
if ( m_needed <= bl2_mem_length_alloc( mem_p ) ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated, AND what is
|
||||
allocated and available is equal to or larger than what is
|
||||
needed, then set the dimension according to how much we
|
||||
need. This allows us to avoid unnecessarily releasing and
|
||||
re-allocating when all we need is a subset of what is already
|
||||
available. */ \
|
||||
bl2_mem_set_dims( m_needed, 1, mem_p ); \
|
||||
needs_alloc = FALSE; \
|
||||
} \
|
||||
if ( bl2_mem_length_alloc( mem_p ) < m_needed ) \
|
||||
{ \
|
||||
/* If the mem_t object is currently allocated and smaller than is
|
||||
needed, release the memory and mark for re-allocation. */ \
|
||||
bl2_mm_release( mem_p ); \
|
||||
needs_alloc = TRUE; \
|
||||
} \
|
||||
@@ -934,8 +960,8 @@ bl2_obj_width_stored( obj )
|
||||
bl2_mm_acquire_v( dt, m_needed, mem_p ); \
|
||||
} \
|
||||
\
|
||||
/* Grab the buffer from the mem_t object and copy it to the main object
|
||||
buffer. */ \
|
||||
/* Grab the buffer address from the mem_t object and copy it to the main
|
||||
object buffer. */ \
|
||||
buf = bl2_mem_buffer( mem_p ); \
|
||||
bl2_obj_set_buffer( buf, obj ); \
|
||||
} \
|
||||
|
||||
@@ -69,6 +69,8 @@ typedef struct mem_s
|
||||
void* buf;
|
||||
dim_t m;
|
||||
dim_t n;
|
||||
dim_t m_alloc;
|
||||
dim_t n_alloc;
|
||||
} mem_t;
|
||||
|
||||
// Blocksize object type
|
||||
|
||||
Reference in New Issue
Block a user