Merge commit 'b683d01b' into amd-main

* commit 'b683d01b':
  Use extra #undef when including ba/ex API headers.
  Minor preprocessor/header cleanup.
  Fixed typo in cpp guard in bli_util_ft.h.
  Defined eqsc, eqv, eqm to test object equality.
  Defined setijv, getijv to set/get vector elements.
  Minor API breakage in bli_pack API.
  Add err_t* "return" parameter to malloc functions.
  Always stay initialized after BLAS compat calls.
  Renamed membrk files/vars/functions to pba.
  Switch allocator mutexes to static initialization.

AMD-Internal: [CPUPL-2698]
Change-Id: Ied2ca8619f144d4b8a7123ac45a1be0dda3875df
This commit is contained in:
Edward Smyth
2023-08-21 06:58:49 -04:00
128 changed files with 2860 additions and 1467 deletions

View File

@@ -79,7 +79,7 @@ void PASTECH2(bls_,ch,opname) \
the current function before the other threads have a chance to
copy from it. (A barrier would fix that race condition, but then
again, I prefer to keep barriers to a minimum.) */ \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -123,12 +123,12 @@ void PASTECH2(bls_,ch,opname) \
above for why the acquisition needs to be directly to
the chief thread's passed-in mem_t and not a local
(temporary) mem_t. */ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \
); \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -182,7 +182,7 @@ void PASTECH2(bls_,ch,opname) \
is allocated, which it should be. */ \
if ( bli_mem_is_alloc( mem ) ) \
{ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \

View File

@@ -79,7 +79,7 @@ void PASTECH2(bls_,ch,opname) \
the current function before the other threads have a chance to
copy from it. (A barrier would fix that race condition, but then
again, I prefer to keep barriers to a minimum.) */ \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -123,12 +123,12 @@ void PASTECH2(bls_,ch,opname) \
above for why the acquisition needs to be directly to
the chief thread's passed-in mem_t and not a local
(temporary) mem_t. */ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \
); \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -182,7 +182,7 @@ void PASTECH2(bls_,ch,opname) \
is allocated, which it should be. */ \
if ( bli_mem_is_alloc( mem ) ) \
{ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \

View File

@@ -75,7 +75,7 @@ void bls_l3_thread_decorator
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allcoate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );

View File

@@ -140,7 +140,7 @@ void bls_l3_thread_decorator
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allocate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );

View File

@@ -68,7 +68,7 @@ void bls_l3_thread_decorator
bli_sba_rntm_set_pool( 0, array, rntm );
// Set the packing block allocator field of the rntm.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
#ifndef SKIP_THRINFO_TREE
// Allcoate a global communicator for the root thrinfo_t structures.

View File

@@ -45,7 +45,7 @@ void blx_l3_packm
thrinfo_t* thread
)
{
membrk_t* membrk;
pba_t* pba;
packbuf_t pack_buf_type;
mem_t* cntl_mem_p;
siz_t size_needed;
@@ -71,7 +71,7 @@ void blx_l3_packm
if ( size_needed == 0 ) return;
// Query the memory broker from the context.
membrk = bli_cntx_get_membrk( cntx );
pba = bli_cntx_get_pba( cntx );
// Query the pack buffer type from the control tree node.
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
@@ -91,9 +91,9 @@ void blx_l3_packm
{
// The chief thread acquires a block from the memory broker
// and saves the associated mem_t entry to local_mem_s.
bli_membrk_acquire_m
bli_pba_acquire_m
(
membrk,
pba,
size_needed,
pack_buf_type,
&local_mem_s
@@ -130,10 +130,10 @@ void blx_l3_packm
// The chief thread releases the existing block associated with
// the mem_t entry in the control tree, and then re-acquires a
// new block, saving the associated mem_t entry to local_mem_s.
bli_membrk_release( cntl_mem_p );
bli_membrk_acquire_m
bli_pba_release( cntl_mem_p );
bli_pba_acquire_m
(
membrk,
pba,
size_needed,
pack_buf_type,
&local_mem_s

View File

@@ -147,7 +147,7 @@ void blx_gemm_thread
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allocate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );