Files
blis/frame/base/bli_array.c
Field G. Van Zee 2f3174330f Implemented a pool-based small block allocator.
Details:
- Implemented a sophisticated data structure and set of APIs that track
  the small blocks of memory (around 80-100 bytes each) used when
  creating nodes for control and thread trees (cntl_t and thrinfo_t) as
  well as thread communicators (thrcomm_t). The purpose of the small
  block allocator, or sba, is to allow the library to transition into a
  runtime state in which it does not perform any calls to malloc() or
  free() during normal execution of level-3 operations, regardless of
  the threading environment (potentially multiple application threads
  as well as multiple BLIS threads). The functionality relies on a new
  data structure, apool_t, which is (roughly speaking) a pool of
  arrays, where each array element is a pool of small blocks. The outer
  pool, which is protected by a mutex, provides separate arrays for each
  application thread while the arrays each handle multiple BLIS threads
  for any given application thread. The design minimizes the potential
  for lock contention, as only concurrent application threads would
  need to fight for the apool_t lock, and only if they happen to begin
  their level-3 operations at precisely the same time. Thanks to Kiran
  Varaganti and AMD for requesting this feature.
- Added a configure option to disable the sba pools, which are enabled
  by default; renamed the --[dis|en]able-packbuf-pools option to
  --[dis|en]able-pba-pools; and rewrote the --help text associated with
  this new option and consolidated it with the --help text for the
  option associated with the sba (--[dis|en]able-sba-pools).
- Moved the membrk field from the cntx_t to the rntm_t. We now pass in
  a rntm_t* to the bli_membrk_acquire() and _release() APIs, just as we
  do for bli_sba_acquire() and _release().
- Replaced all calls to bli_malloc_intl() and bli_free_intl() that are
  used for small blocks with calls to bli_sba_acquire(), which takes a
  rntm (in addition to the bytes requested), and bli_sba_release().
  These latter two functions reduce to the former two when the sba pools
  are disabled at configure-time.
- Added rntm_t* arguments to various cntl_t and thrinfo_t functions, as
  required by the new usage of bli_sba_acquire() and _release().
- Moved the freeing of "old" blocks (those allocated prior to a change
  in the block_size) from bli_membrk_acquire_m() to the implementation
  of the pool_t checkout function.
- Miscellaneous improvements to the pool_t API.
- Added a block_size field to the pblk_t.
- Harmonized the way that the trsm_ukr testsuite module performs packing
  relative to that of gemmtrsm_ukr, in part to avoid the need to create
  a packm control tree node, which now requires a rntm_t that has been
  initialized with an sba and membrk.
- Re-enable explicit call bli_finalize() in testsuite so that users who
  run the testsuite with memory tracing enabled can check for memory
  leaks.
- Manually imported the compact/minor changes from 61441b24 that cause
  the rntm to be copied locally when it is passed in via one of the
  expert APIs.
- Reordered parameters to various bli_thrcomm_*() functions so that the
  thrcomm_t* to the comm being modified is last, not first.
- Added more descriptive tracing for allocating/freeing small blocks and
  formalized via a new configure option: --[dis|en]able-mem-tracing.
- Moved some unused scalm code and headers into frame/1m/other.
- Whitespace changes to bli_pthread.c.
- Regenerated build/libblis-symbols.def.
2018-12-25 19:35:01 -06:00

205 lines
6.5 KiB
C

/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
//#define BLIS_ENABLE_MEM_TRACING
void bli_array_init
(
const siz_t num_elem,
const siz_t elem_size,
array_t* restrict array
)
{
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_array_init(): allocating array [%d * %d]: ",
( int )num_elem, ( int )elem_size );
#endif
// Compute the total size (in bytes) of the array.
const size_t array_size = num_elem * elem_size;
// Allocate the array buffer.
void* restrict buf = bli_malloc_intl( array_size );
// Initialize the array elements to zero. THIS IS IMPORANT because
// consumer threads will use the NULL-ness of the array elements to
// determine if the corresponding block (data structure) needs to be
// created/allocated and initialized.
memset( buf, 0, array_size );
// Initialize the array_t structure.
bli_array_set_buf( buf, array );
bli_array_set_num_elem( num_elem, array );
bli_array_set_elem_size( elem_size, array );
}
void bli_array_resize
(
const siz_t num_elem_new,
array_t* restrict array
)
{
// Query the number of elements in the array.
const siz_t num_elem_prev = bli_array_num_elem( array );
// If the new requested size (number of elements) is less than or equal to
// the current size, no action is needed; return early.
if ( num_elem_new <= num_elem_prev ) return;
// At this point, we know that num_elem_prev < num_elem_new, which means
// we need to proceed with the resizing.
// Query the size of each element in the array.
const siz_t elem_size = bli_array_elem_size( array );
// Compute the total size (in bytes) of the array before and after resizing.
const size_t array_size_prev = num_elem_prev * elem_size;
const size_t array_size_new = num_elem_new * elem_size;
// Query the previous array buffer.
void* restrict buf_prev = bli_array_buf( array );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_array_resize(): allocating array [%d * %d]: ",
( int )num_elem_new, ( int )elem_size );
#endif
// Allocate a new array buffer.
char* restrict buf_new = bli_malloc_intl( array_size_new );
// Copy the previous array contents to the new array.
memcpy( buf_new, buf_prev, array_size_prev );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_array_resize(): freeing array [%d * %d]: ",
( int )num_elem_prev, ( int )elem_size );
#endif
// Now that the elements have been copied over to the new buffer, we can
// free the previous array buffer.
bli_free_intl( buf_prev );
// Initialize the new elements' contents to zero. (Note that we advance
// the new buffer address by the size of the previous array so that we
// arrive at the first byte of the new segment.)
memset( &buf_new[ array_size_prev ], 0, array_size_new - array_size_prev );
// Update the array_t structure.
// NOTE: The array elem_size field does not need updating.
bli_array_set_buf( buf_new, array );
bli_array_set_num_elem( num_elem_new, array );
}
void bli_array_finalize
(
array_t* restrict array
)
{
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_array_finalize(): freeing buf (length %d): ",
( int )bli_array_num_elem( array ) );
#endif
// Query the buffer from the array.
void* restrict buf = bli_array_buf( array );
// Free the buffer.
bli_free_intl( buf );
}
void* bli_array_elem
(
const siz_t index,
array_t* restrict array
)
{
// Query the number of elements in the array.
const siz_t num_elem = bli_array_num_elem( array );
// Sanity check: disallow access beyond the bounds of the array.
if ( num_elem <= index ) bli_abort();
// Query the size of each element in the array.
const siz_t elem_size = bli_array_elem_size( array );
// Query the buffer from the array, but store it as a char* so we can use
// it to easily perform byte pointer arithmetic.
char* restrict buf = bli_array_buf( array );
// Advance the pointer by (index * elem_size) bytes.
buf += index * elem_size;
// Return the address of the element computed above.
return ( void* )buf;
}
void bli_array_set_elem
(
void* restrict elem,
const siz_t index,
array_t* restrict array
)
{
// Query the size of each element in the array.
const siz_t elem_size = bli_array_elem_size( array );
// Query the buffer from the array as a char*.
char* restrict buf = bli_array_buf( array );
if ( elem_size == sizeof( void* ) )
{
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_array_set_elem(): elem_size is %d; setting index %d.\n",
( int )elem_size, ( int )index );
fflush( stdout );
#endif
// Special case: Handle elem_size = sizeof( void* ) without calling
// memcpy().
void** restrict buf_vvp = ( void** )buf;
void** restrict elem_vvp = ( void** )elem;
buf_vvp[ index ] = *elem_vvp;
}
else
{
// General case: Copy the elem_size bytes from elem to buf at the
// element index specified by index.
memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size );
}
}