Added option to disable pack buffer memory pools.

Details:
- Added a new configure option, --[en|dis]able-packbuf-pools, which will
  enable or disable the use of internal memory pools for managing buffers
  used for packing. When disabled, the function specified by the cpp
  macro BLIS_MALLOC_POOL is called whenever a packing buffer is needed
  (and BLIS_FREE_POOL is called when the buffer is ready to be released,
  usually at the end of a loop). When enabled, which was the status quo
  prior to this commit, a memory pool data structure is created and
  managed to provide threads with packing buffers. The memory pool
  minimizes calls to bli_malloc_pool() (i.e., the wrapper that calls
  BLIS_MALLOC_POOL), but does so through a somewhat more complex
  mechanism that may incur additional overhead in some (but not all)
  situations. The new option defaults to --enable-packbuf-pools.
- Removed the reinitialization of the memory pools from the level-3
  front-ends and replaced it with automatic reinitialization within the
  pool API's implementation. This required an extra argument to
  bli_pool_checkout_block() in the form of a requested size, but hides
  the complexity entirely from BLIS. And since bli_pool_checkout_block()
  is only ever called within a critical section, this change fixes a
  potential race condition in which threads using contexts with different
  cache blocksizes--most likely a heterogeneous environment--can check
  out pool blocks that are too small for the submatrices it wishes to
  pack. Thanks to Nisanth Padinharepatt for reporting this potential
  issue.
- Removed several functions in light of the relocation of pool reinit,
  including bli_membrk_reinit_pools(), bli_memsys_reinit(),
  bli_pool_reinit_if(), and bli_check_requested_block_size_for_pool().
- Updated the testsuite to print whether the memory pools are enabled or
  disabled.
This commit is contained in:
Field G. Van Zee
2017-12-21 19:22:57 -06:00
parent 107801aaae
commit 9804adfd40
25 changed files with 85 additions and 177 deletions

View File

@@ -52,6 +52,10 @@
#define BLIS_ENABLE_PTHREADS
#endif
#if @enable_packbuf_pools@
#define BLIS_ENABLE_PACKBUF_POOLS
#endif
#if @int_type_size@ == 64
#define BLIS_INT_TYPE_SIZE 64
#elif @int_type_size@ == 32

30
configure vendored
View File

@@ -99,6 +99,21 @@ print_usage()
echo " --disable-threading is specified, threading will be"
echo " disabled. The default is 'no'."
echo " "
echo " --disable-packbuf-pools, --enable-packbuf-pools"
echo " "
echo " Disable (enabled by default) use of internal memory"
echo " pools for managing packing buffers. When disabled,"
echo " the function specified by BLIS_MALLOC_POOL is called"
echo " on-demand, whenever a packing buffer is needed, and"
echo " the buffer is released via the function specified by"
echo " BLIS_FREE_POOL() when the loop in which it was"
echo " allocated terminates. When enabled, the memory pools"
echo " minimize calls to both BLIS_MALLOC_POOL() and"
echo " BLIS_FREE_POOL(), especially in a multithreaded"
echo " environment, but does so through a mechanism that may"
echo " incur additional overhead in some (but not all)"
echo " situations."
echo " "
echo " -q, --quiet Suppress informational output. By default, configure"
echo " is verbose. (NOTE: -q is not yet implemented)"
echo " "
@@ -502,6 +517,7 @@ main()
enable_verbose='no'
enable_static='yes'
enable_shared='no'
enable_packbuf_pools='yes'
int_type_size=0
blas2blis_int_type_size=32
enable_blas2blis='yes'
@@ -582,6 +598,12 @@ main()
disable-threading)
threading_model='no'
;;
enable-packbuf-pools)
enable_packbuf_pools='yes'
;;
disable-packbuf-pools)
enable_packbuf_pools='no'
;;
int-size=*)
int_type_size=${OPTARG#*=}
;;
@@ -1029,6 +1051,13 @@ main()
fi
# Convert 'yes' and 'no' flags to booleans.
if [ "x${enable_packbuf_pools}" = "xyes" ]; then
echo "${script_name}: internal memory pools for packing buffers are enabled."
enable_packbuf_pools_01=1
else
echo "${script_name}: internal memory pools for packing buffers are disabled."
enable_packbuf_pools_01=0
fi
if [ "x${enable_blas2blis}" = "xyes" ]; then
echo "${script_name}: the BLAS compatibility layer is enabled."
enable_blas2blis_01=1
@@ -1135,6 +1164,7 @@ main()
| perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \
| sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
| sed -e "s/@enable_packbuf_pools@/${enable_packbuf_pools_01}/g" \
| sed -e "s/@int_type_size@/${int_type_size}/g" \
| sed -e "s/@blas2blis_int_type_size@/${blas2blis_int_type_size}/g" \
| sed -e "s/@enable_blas2blis@/${enable_blas2blis_01}/g" \

View File

@@ -62,10 +62,6 @@ void bli_gemm_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -63,10 +63,6 @@ void bli_hemm_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -67,10 +67,6 @@ void bli_her2k_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -63,10 +63,6 @@ void bli_herk_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *c, c_local );

View File

@@ -63,10 +63,6 @@ void bli_symm_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -64,10 +64,6 @@ void bli_syr2k_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -61,10 +61,6 @@ void bli_syrk_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *c, c_local );

View File

@@ -61,10 +61,6 @@ void bli_trmm_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A and B so we can tweak the objects if necessary.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -63,10 +63,6 @@ void bli_trmm3_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A, B, and C so we can tweak the objects if necessary.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -61,10 +61,6 @@ void bli_trsm_front
return;
}
// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );
// Alias A and B so we can tweak the objects if necessary.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );

View File

@@ -757,16 +757,6 @@ err_t bli_check_valid_packbuf( packbuf_t buf_type )
return e_val;
}
err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool )
{
err_t e_val = BLIS_SUCCESS;
if ( bli_pool_block_size( pool ) < req_size )
e_val = BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG;
return e_val;
}
err_t bli_check_if_exhausted_pool( pool_t* pool )
{
err_t e_val = BLIS_SUCCESS;

View File

@@ -97,7 +97,6 @@ err_t bli_check_packv_schema_on_unpack( obj_t* a );
err_t bli_check_object_buffer( obj_t* a );
err_t bli_check_valid_packbuf( packbuf_t buf_type );
err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool );
err_t bli_check_if_exhausted_pool( pool_t* pool );
err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx );
err_t bli_check_alignment_is_power_of_two( size_t align_size );

View File

@@ -149,8 +149,6 @@ void bli_error_init_msgs( void )
sprintf( bli_error_string_for_code(BLIS_INVALID_PACKBUF),
"Invalid packbuf_t value." );
sprintf( bli_error_string_for_code(BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG ),
"Attempted to allocate contiguous memory block that is too big for implementation." );
sprintf( bli_error_string_for_code(BLIS_EXHAUSTED_CONTIG_MEMORY_POOL),
"Attempted to allocate more memory from contiguous pool than is available." );
sprintf( bli_error_string_for_code(BLIS_INSUFFICIENT_STACK_BUF_SIZE),

View File

@@ -86,6 +86,14 @@ gint_t bli_info_get_enable_cblas( void )
#endif
}
gint_t bli_info_get_blas2blis_int_type_size( void ) { return BLIS_BLAS2BLIS_INT_TYPE_SIZE; }
gint_t bli_info_get_enable_packbuf_pools( void )
{
#ifdef BLIS_ENABLE_PACKBUF_POOLS
return 1;
#else
return 0;
#endif
}

View File

@@ -57,6 +57,7 @@ gint_t bli_info_get_enable_stay_auto_init( void );
gint_t bli_info_get_enable_blas2blis( void );
gint_t bli_info_get_enable_cblas( void );
gint_t bli_info_get_blas2blis_int_type_size( void );
gint_t bli_info_get_enable_packbuf_pools( void );
// -- Kernel implementation-related --------------------------------------------

View File

@@ -42,7 +42,9 @@ void bli_membrk_init
)
{
bli_mutex_init( bli_membrk_mutex( membrk ) );
#ifdef BLIS_ENABLE_PACKBUF_POOLS
bli_membrk_init_pools( cntx, membrk );
#endif
bli_membrk_set_malloc_fp( bli_malloc_pool, membrk );
bli_membrk_set_free_fp( bli_free_pool, membrk );
}
@@ -53,7 +55,10 @@ void bli_membrk_finalize
)
{
bli_membrk_set_malloc_fp( NULL, membrk );
bli_membrk_set_free_fp( NULL, membrk );
#ifdef BLIS_ENABLE_PACKBUF_POOLS
bli_membrk_finalize_pools( membrk );
#endif
bli_mutex_finalize( bli_membrk_mutex( membrk ) );
}
@@ -70,6 +75,13 @@ void bli_membrk_acquire_m
dim_t pi;
siz_t block_size;
// If the internal memory pools for pack buffers are disabled, we
// spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the
// immediate usage of bli_membrk_malloc().
#ifndef BLIS_ENABLE_PACKBUF_POOLS
buf_type = BLIS_BUFFER_FOR_GEN_USE;
#endif
// Make sure the API is initialized.
//assert( membrk ); //??
@@ -86,11 +98,12 @@ void bli_membrk_acquire_m
// - the buffer type (a packbuf_t value),
// - the size of the requested region,
// - the membrk_t from which the mem_t entry was acquired.
// NOTE: We do not initialize the pool field since this block did not
// NOTE: We initialize the pool field to NULL since this block did not
// come from a memory pool.
bli_mem_set_buffer( buf_sys, mem );
bli_mem_set_buf_sys( buf_sys, mem );
bli_mem_set_buf_type( buf_type, mem );
bli_mem_set_pool( NULL, mem );
bli_mem_set_size( req_size, mem );
bli_mem_set_membrk( membrk, mem );
}
@@ -105,17 +118,6 @@ void bli_membrk_acquire_m
pi = bli_packbuf_index( buf_type );
pool = bli_membrk_pool( pi, membrk );
// Unconditionally perform error checking on the memory pool.
{
err_t e_val;
// Make sure that the requested matrix size fits inside of a block
// of the corresponding pool. If it does not, the pool was somehow
// initialized improperly.
e_val = bli_check_requested_block_size_for_pool( req_size, pool );
bli_check_error_code( e_val );
}
// Extract the address of the pblk_t struct within the mem_t.
pblk = bli_mem_pblk( mem );
@@ -123,19 +125,21 @@ void bli_membrk_acquire_m
bli_membrk_lock( membrk );
{
// Checkout a block from the pool. If the pool is exhausted,
// Checkout a block from the pool. If the pool's blocks are too
// small, it will be reinitialized with blocks large enough to
// accommodate the requested block size. If the pool is exhausted,
// either because it is still empty or because all blocks have
// been checked out already, additional blocks will be allocated
// automatically, as-needed. Note that the addresses are stored
// directly into the mem_t struct since pblk is the address of
// the struct's pblk_t field.
bli_pool_checkout_block( pblk, pool );
bli_pool_checkout_block( req_size, pblk, pool );
// Query the size of the blocks in the pool so we can store it in
// the mem_t object. At this point, it is guaranteed to be at
// least as large as req_size. (NOTE: We must perform the query
// within the critical section to ensure that the pool hasn't
// changed, as unlikely as that would be.)
// changed.)
block_size = bli_pool_block_size( pool );
}
@@ -329,49 +333,6 @@ void bli_membrk_init_pools
bli_pool_init( num_blocks_c, block_size_c, align_size, pool_c );
}
void bli_membrk_reinit_pools
(
cntx_t* cntx,
membrk_t* membrk
)
{
// Map each of the packbuf_t values to an index starting at zero.
const dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK );
const dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL );
const dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL );
const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE;
// Alias the pool addresses to convenient identifiers.
pool_t* pool_a = bli_membrk_pool( index_a, membrk );
pool_t* pool_b = bli_membrk_pool( index_b, membrk );
pool_t* pool_c = bli_membrk_pool( index_c, membrk );
// Query the number of blocks currently allocated in each pool.
const dim_t num_blocks_a = bli_pool_num_blocks( pool_a );
const dim_t num_blocks_b = bli_pool_num_blocks( pool_b );
const dim_t num_blocks_c = bli_pool_num_blocks( pool_c );
siz_t block_size_a_new = 0;
siz_t block_size_b_new = 0;
siz_t block_size_c_new = 0;
// Determine the context-implied block size needed for each pool.
bli_membrk_compute_pool_block_sizes( &block_size_a_new,
&block_size_b_new,
&block_size_c_new,
cntx );
// Reinitialize the pool, but only if one of the parameters has
// changed in such a way that reinitialization would be required.
// In this case, the align_size is constant, as is num_blocks, so
// what this actually boils down to is that reinitialization of a
// pool occurs only if the block size for that pool has increased.
bli_pool_reinit_if( num_blocks_a, block_size_a_new, align_size, pool_a );
bli_pool_reinit_if( num_blocks_b, block_size_b_new, align_size, pool_b );
bli_pool_reinit_if( num_blocks_c, block_size_c_new, align_size, pool_c );
}
void bli_membrk_finalize_pools
(
membrk_t* membrk

View File

@@ -140,11 +140,6 @@ void bli_membrk_init_pools
cntx_t* cntx,
membrk_t* membrk
);
void bli_membrk_reinit_pools
(
cntx_t* cntx,
membrk_t* membrk
);
void bli_membrk_finalize_pools
(
membrk_t* membrk

View File

@@ -80,27 +80,6 @@ void bli_memsys_init( void )
#endif
}
void bli_memsys_reinit( cntx_t* cntx )
{
#ifdef BLIS_ENABLE_OPENMP
_Pragma( "omp critical (mem)" )
#endif
#ifdef BLIS_ENABLE_PTHREADS
pthread_mutex_lock( &memsys_mutex );
#endif
// BEGIN CRITICAL SECTION
{
// Reinitialize the global membrk_t object's memory pools.
bli_membrk_reinit_pools( cntx, &global_membrk );
}
// END CRITICAL SECTION
#ifdef BLIS_ENABLE_PTHREADS
pthread_mutex_unlock( &memsys_mutex );
#endif
}
void bli_memsys_finalize( void )
{
#ifdef BLIS_ENABLE_OPENMP

View File

@@ -43,7 +43,6 @@ membrk_t* bli_memsys_global_membrk( void );
// -----------------------------------------------------------------------------
void bli_memsys_init( void );
void bli_memsys_reinit( cntx_t* cntx );
void bli_memsys_finalize( void );

View File

@@ -90,6 +90,9 @@ void bli_pool_finalize( pool_t* pool )
// Free the block_ptrs array.
bli_free_intl( block_ptrs );
// This explicit clearing of the pool_t struct is not strictly
// necessary and so it has been commented out.
#if 0
// Clear the contents of the pool_t struct.
bli_pool_set_block_ptrs( NULL, pool );
bli_pool_set_block_ptrs_len( 0, pool );
@@ -97,6 +100,7 @@ void bli_pool_finalize( pool_t* pool )
bli_pool_set_top_index( 0, pool );
bli_pool_set_block_size( 0, pool );
bli_pool_set_align_size( 0, pool );
#endif
}
void bli_pool_reinit( dim_t num_blocks_new,
@@ -116,43 +120,25 @@ void bli_pool_reinit( dim_t num_blocks_new,
bli_pool_init( num_blocks_new, block_size_new, align_size_new, pool );
}
void bli_pool_reinit_if( dim_t num_blocks_new,
siz_t block_size_new,
siz_t align_size_new,
pool_t* pool )
{
const dim_t num_blocks = bli_pool_num_blocks( pool );
const dim_t block_size = bli_pool_block_size( pool );
const dim_t align_size = bli_pool_align_size( pool );
// Reinitialize the pool, but only if one or more of new pool
// parameters would require it. Otherwise, if only the number
// of blocks has increased, we can skip a full reinit and just
// grow the pool.
if ( block_size_new > block_size ||
align_size_new != align_size )
{
// Reinitialize the pool with the new parameters, in particular,
// the new block size.
bli_pool_reinit( num_blocks_new,
block_size_new,
align_size_new,
pool );
}
else if ( num_blocks_new > num_blocks )
{
const dim_t num_blocks_add = num_blocks_new -
num_blocks;
bli_pool_grow( num_blocks_add, pool );
}
}
void bli_pool_checkout_block( pblk_t* block, pool_t* pool )
void bli_pool_checkout_block( siz_t req_size, pblk_t* block, pool_t* pool )
{
pblk_t* block_ptrs;
dim_t top_index;
if ( bli_pool_block_size( pool ) < req_size )
{
const dim_t num_blocks_new = bli_pool_num_blocks( pool );
const siz_t align_size_new = bli_pool_align_size( pool );
// If the requested block size is smaller than what the pool
// was initialized with, reinitialize the pool to contain blocks
// of the requested size.
bli_pool_reinit( num_blocks_new,
req_size,
align_size_new,
pool );
}
// If the pool is exhausted, add a block.
if ( bli_pool_is_exhausted( pool ) )
{

View File

@@ -173,12 +173,8 @@ void bli_pool_reinit( dim_t num_blocks_new,
siz_t block_size_new,
siz_t align_size_new,
pool_t* pool );
void bli_pool_reinit_if( dim_t num_blocks_new,
siz_t block_size_new,
siz_t align_size_new,
pool_t* pool );
void bli_pool_checkout_block( pblk_t* block, pool_t* pool );
void bli_pool_checkout_block( siz_t req_size, pblk_t* block, pool_t* pool );
void bli_pool_checkin_block( pblk_t* block, pool_t* pool );
void bli_pool_grow( dim_t num_blocks_add, pool_t* pool );

View File

@@ -1206,7 +1206,6 @@ typedef enum
// Memory allocator errors
BLIS_INVALID_PACKBUF = (-120),
BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG = (-121),
BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-122),
BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-123),
BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-124),

View File

@@ -629,6 +629,9 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params )
libblis_test_fprintf_c( os, "Max stack buffer size (bytes) %d\n", ( int )bli_info_get_stack_buf_max_size() );
libblis_test_fprintf_c( os, "Page size (bytes) %d\n", ( int )bli_info_get_page_size() );
libblis_test_fprintf_c( os, "\n" );
libblis_test_fprintf_c( os, "memory pools for pack buffers\n" );
libblis_test_fprintf_c( os, " enabled? %d\n", ( int )bli_info_get_enable_packbuf_pools() );
libblis_test_fprintf_c( os, "\n" );
libblis_test_fprintf_c( os, "memory alignment (bytes) \n" );
libblis_test_fprintf_c( os, " stack address %d\n", ( int )bli_info_get_stack_buf_align_size() );
libblis_test_fprintf_c( os, " obj_t address %d\n", ( int )bli_info_get_heap_addr_align_size() );