Renamed bli_thread_obarrier(), _obroadcast().

Details:
- Renamed two bli_thread_*() APIs:
    bli_thread_obarrier()   -> bli_thread_barrier()
    bli_thread_obroadcast() -> bli_thread_broadcast()
  The 'o' was a leftover from when thrcomm_t objects tracked both
  "inner" and "outer" communicators. They have long since been
  simplified to only support the latter, and thus the 'o' is
  superfluous.
This commit is contained in:
Field G. Van Zee
2020-02-25 14:50:53 -06:00
parent f6e6bf73e6
commit c01d249d7c
13 changed files with 35 additions and 35 deletions

View File

@@ -667,7 +667,7 @@ if ( col_stored ) { \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
if ( bli_thread_work_id( thread ) == 1 ) \ if ( bli_thread_work_id( thread ) == 1 ) \
{ \ { \
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
@@ -678,7 +678,7 @@ bli_thread_obarrier( thread ); \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
} \ } \
else { \ else { \
if ( bli_thread_work_id( thread ) == 0 ) \ if ( bli_thread_work_id( thread ) == 0 ) \
@@ -691,7 +691,7 @@ else { \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
if ( bli_thread_work_id( thread ) == 1 ) \ if ( bli_thread_work_id( thread ) == 1 ) \
{ \ { \
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
@@ -702,7 +702,7 @@ bli_thread_obarrier( thread ); \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
} \ } \
*/ */
/* /*

View File

@@ -73,6 +73,6 @@ void bli_unpackm_int
} }
// Barrier so that unpacking is done before computation. // Barrier so that unpacking is done before computation.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
} }

View File

@@ -50,7 +50,7 @@ void bli_l3_packm
siz_t size_needed; siz_t size_needed;
// FGVZ: Not sure why we need this barrier, but we do. // FGVZ: Not sure why we need this barrier, but we do.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
// Every thread initializes x_pack and determines the size of memory // Every thread initializes x_pack and determines the size of memory
// block needed (which gets embedded into the otherwise "blank" mem_t // block needed (which gets embedded into the otherwise "blank" mem_t
@@ -102,7 +102,7 @@ void bli_l3_packm
// Broadcast the address of the chief thread's local mem_t entry to // Broadcast the address of the chief thread's local mem_t entry to
// all threads. // all threads.
local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); local_mem_p = bli_thread_broadcast( thread, &local_mem_s );
// Save the contents of the chief thread's local mem_t entry to the // Save the contents of the chief thread's local mem_t entry to the
// mem_t field in this thread's control tree node. // mem_t field in this thread's control tree node.
@@ -146,7 +146,7 @@ void bli_l3_packm
// Broadcast the address of the chief thread's local mem_t entry to // Broadcast the address of the chief thread's local mem_t entry to
// all threads. // all threads.
local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); local_mem_p = bli_thread_broadcast( thread, &local_mem_s );
// Save the chief thread's local mem_t entry to the mem_t field in // Save the chief thread's local mem_t entry to the mem_t field in
// this thread's control tree node. // this thread's control tree node.
@@ -159,7 +159,7 @@ void bli_l3_packm
// will already have the cached values in their local control // will already have the cached values in their local control
// trees' mem_t entries, currently pointed to by cntl_mem_p. // trees' mem_t entries, currently pointed to by cntl_mem_p.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
} }
} }
@@ -182,6 +182,6 @@ void bli_l3_packm
); );
// Barrier so that packing is done before computation. // Barrier so that packing is done before computation.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
} }

View File

@@ -237,7 +237,7 @@ if ( col_stored ) { \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
if ( bli_thread_work_id( thread ) == 1 ) \ if ( bli_thread_work_id( thread ) == 1 ) \
{ \ { \
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
@@ -248,7 +248,7 @@ bli_thread_obarrier( thread ); \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
} \ } \
else { \ else { \
if ( bli_thread_work_id( thread ) == 0 ) \ if ( bli_thread_work_id( thread ) == 0 ) \
@@ -261,7 +261,7 @@ else { \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
if ( bli_thread_work_id( thread ) == 1 ) \ if ( bli_thread_work_id( thread ) == 1 ) \
{ \ { \
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
@@ -272,7 +272,7 @@ bli_thread_obarrier( thread ); \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \ fflush( stdout ); \
} \ } \
bli_thread_obarrier( thread ); \ bli_thread_barrier( thread ); \
} \ } \
*/ */
/* /*

View File

@@ -84,7 +84,7 @@ void bli_gemm_blk_var3
bli_thrinfo_sub_node( thread ) bli_thrinfo_sub_node( thread )
); );
bli_thread_obarrier( bli_thrinfo_sub_node( thread ) ); bli_thread_barrier( bli_thrinfo_sub_node( thread ) );
// This variant executes multiple rank-k updates. Therefore, if the // This variant executes multiple rank-k updates. Therefore, if the
// internal beta scalar on matrix C is non-zero, we must use it // internal beta scalar on matrix C is non-zero, we must use it

View File

@@ -66,7 +66,7 @@ void bli_gemm_int
{ {
if ( bli_thread_am_ochief( thread ) ) if ( bli_thread_am_ochief( thread ) )
bli_scalm( beta, c ); bli_scalm( beta, c );
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
return; return;
} }
@@ -80,7 +80,7 @@ void bli_gemm_int
if ( bli_thread_am_ochief( thread ) ) if ( bli_thread_am_ochief( thread ) )
bli_scalm( beta, c ); bli_scalm( beta, c );
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
return; return;
} }

View File

@@ -117,7 +117,7 @@ void bli_trsm_blk_var1
// We must execute a barrier here because the upcoming rank-k update // We must execute a barrier here because the upcoming rank-k update
// requires the packed matrix B to be fully updated by the trsm // requires the packed matrix B to be fully updated by the trsm
// subproblem. // subproblem.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
// Isolate the remaining part of the column panel matrix A, which we do by // Isolate the remaining part of the column panel matrix A, which we do by
// acquiring the subpartition ahead of A11 (that is, A21 or A01, depending // acquiring the subpartition ahead of A11 (that is, A21 or A01, depending

View File

@@ -85,7 +85,7 @@ void bli_trsm_blk_var3
); );
//bli_thread_ibarrier( thread ); //bli_thread_ibarrier( thread );
bli_thread_obarrier( bli_thrinfo_sub_node( thread ) ); bli_thread_barrier( bli_thrinfo_sub_node( thread ) );
// This variant executes multiple rank-k updates. Therefore, if the // This variant executes multiple rank-k updates. Therefore, if the
// internal alpha scalars on A/B and C are non-zero, we must ensure // internal alpha scalars on A/B and C are non-zero, we must ensure

View File

@@ -68,7 +68,7 @@ void bli_trsm_int
{ {
if ( bli_thread_am_ochief( thread ) ) if ( bli_thread_am_ochief( thread ) )
bli_scalm( beta, c ); bli_scalm( beta, c );
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
return; return;
} }
@@ -119,7 +119,7 @@ void bli_trsm_int
} }
// FGVZ->TMS: Is this barrier still needed? // FGVZ->TMS: Is this barrier still needed?
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
// Create the next node in the thrinfo_t structure. // Create the next node in the thrinfo_t structure.
bli_thrinfo_grow( rntm, cntl, thread ); bli_thrinfo_grow( rntm, cntl, thread );

View File

@@ -340,7 +340,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl
// Broadcast the temporary array to all threads in the parent's // Broadcast the temporary array to all threads in the parent's
// communicator. // communicator.
new_comms = bli_thread_obroadcast( thread_par, new_comms ); new_comms = bli_thread_broadcast( thread_par, new_comms );
// Chiefs in the child communicator allocate the communicator // Chiefs in the child communicator allocate the communicator
// object and store it in the array element corresponding to the // object and store it in the array element corresponding to the
@@ -348,7 +348,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl
if ( child_comm_id == 0 ) if ( child_comm_id == 0 )
new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in ); new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in );
bli_thread_obarrier( thread_par ); bli_thread_barrier( thread_par );
// All threads create a new thrinfo_t node using the communicator // All threads create a new thrinfo_t node using the communicator
// that was created by their chief, as identified by parent_work_id. // that was created by their chief, as identified by parent_work_id.
@@ -364,7 +364,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl
NULL // sub_node NULL // sub_node
); );
bli_thread_obarrier( thread_par ); bli_thread_barrier( thread_par );
// The parent's chief thread frees the temporary array of thrcomm_t // The parent's chief thread frees the temporary array of thrcomm_t
// pointers. // pointers.
@@ -477,7 +477,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode
const dim_t child_comm_id = parent_comm_id % child_nt_in; const dim_t child_comm_id = parent_comm_id % child_nt_in;
const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way );
bli_thread_obarrier( thread_par ); bli_thread_barrier( thread_par );
// NOTE: Recall that parent_comm_id == child_comm_id, so checking for the // NOTE: Recall that parent_comm_id == child_comm_id, so checking for the
// parent's chief-ness is equivalent to checking for chief-ness in the new // parent's chief-ness is equivalent to checking for chief-ness in the new
@@ -488,7 +488,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode
// Broadcast the new thrcomm_t address to the other threads in the // Broadcast the new thrcomm_t address to the other threads in the
// parent's group. // parent's group.
new_comm = bli_thread_obroadcast( thread_par, new_comm ); new_comm = bli_thread_broadcast( thread_par, new_comm );
// All threads create a new thrinfo_t node using the communicator // All threads create a new thrinfo_t node using the communicator
// that was created by their chief, as identified by parent_work_id. // that was created by their chief, as identified by parent_work_id.
@@ -504,7 +504,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode
NULL // sub_node NULL // sub_node
); );
bli_thread_obarrier( thread_par ); bli_thread_barrier( thread_par );
return thread_chl; return thread_chl;
} }

View File

@@ -141,12 +141,12 @@ static void bli_thrinfo_set_sub_prenode( thrinfo_t* sub_prenode, thrinfo_t* t )
// other thrinfo_t-related functions // other thrinfo_t-related functions
static void* bli_thread_obroadcast( thrinfo_t* t, void* p ) static void* bli_thread_broadcast( thrinfo_t* t, void* p )
{ {
return bli_thrcomm_bcast( t->ocomm_id, p, t->ocomm ); return bli_thrcomm_bcast( t->ocomm_id, p, t->ocomm );
} }
static void bli_thread_obarrier( thrinfo_t* t ) static void bli_thread_barrier( thrinfo_t* t )
{ {
bli_thrcomm_barrier( t->ocomm_id, t->ocomm ); bli_thrcomm_barrier( t->ocomm_id, t->ocomm );
} }

View File

@@ -51,7 +51,7 @@ void blx_l3_packm
siz_t size_needed; siz_t size_needed;
// FGVZ: Not sure why we need this barrier, but we do. // FGVZ: Not sure why we need this barrier, but we do.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
// Every thread initializes x_pack and determines the size of memory // Every thread initializes x_pack and determines the size of memory
// block needed (which gets embedded into the otherwise "blank" mem_t // block needed (which gets embedded into the otherwise "blank" mem_t
@@ -102,7 +102,7 @@ void blx_l3_packm
// Broadcast the address of the chief thread's local mem_t entry to // Broadcast the address of the chief thread's local mem_t entry to
// all threads. // all threads.
local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); local_mem_p = bli_thread_broadcast( thread, &local_mem_s );
// Save the contents of the chief thread's local mem_t entry to the // Save the contents of the chief thread's local mem_t entry to the
// mem_t field in this thread's control tree node. // mem_t field in this thread's control tree node.
@@ -142,7 +142,7 @@ void blx_l3_packm
// Broadcast the address of the chief thread's local mem_t entry to // Broadcast the address of the chief thread's local mem_t entry to
// all threads. // all threads.
local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); local_mem_p = bli_thread_broadcast( thread, &local_mem_s );
// Save the chief thread's local mem_t entry to the mem_t field in // Save the chief thread's local mem_t entry to the mem_t field in
// this thread's control tree node. // this thread's control tree node.
@@ -155,7 +155,7 @@ void blx_l3_packm
// will already have the cached values in their local control // will already have the cached values in their local control
// trees' mem_t entries, currently pointed to by cntl_mem_p. // trees' mem_t entries, currently pointed to by cntl_mem_p.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
} }
} }
@@ -178,6 +178,6 @@ void blx_l3_packm
); );
// Barrier so that packing is done before computation. // Barrier so that packing is done before computation.
bli_thread_obarrier( thread ); bli_thread_barrier( thread );
} }

View File

@@ -73,7 +73,7 @@ void blx_gemm_blk_var3
bli_thrinfo_sub_node( thread ) bli_thrinfo_sub_node( thread )
); );
bli_thread_obarrier( bli_thrinfo_sub_node( thread ) ); bli_thread_barrier( bli_thrinfo_sub_node( thread ) );
// This variant executes multiple rank-k updates. Therefore, if the // This variant executes multiple rank-k updates. Therefore, if the
// internal beta scalar on matrix C is non-zero, we must use it // internal beta scalar on matrix C is non-zero, we must use it