mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Fixed bug with parallel packing, and bug with allocating an array of thread infos
In packm variant 1, the variable p_begin was incremented each iteration, causing a dependency. This dependeny was removed, allowing each iteration to be executed in parallel. Somewhere in bli_threading.c, I was allocating an array of pointers instead of an array of structs.
This commit is contained in:
@@ -187,9 +187,6 @@ void PASTEMAC(ch,varname )( \
|
||||
to pack it. */ \
|
||||
if ( bli_is_zeros( uploc ) && \
|
||||
bli_is_triangular( strucc ) ) return; \
|
||||
\
|
||||
dim_t t_id = thread_id( thread ); \
|
||||
dim_t num_threads = thread_num_threads( thread ); \
|
||||
\
|
||||
/* Extract the conjugation bit from the transposition argument. */ \
|
||||
conjc = bli_extract_conj( transc ); \
|
||||
@@ -266,6 +263,9 @@ void PASTEMAC(ch,varname )( \
|
||||
} \
|
||||
\
|
||||
p_begin = p_cast; \
|
||||
dim_t t_id = thread_id( thread ); \
|
||||
dim_t num_threads = thread_num_threads( thread ); \
|
||||
\
|
||||
\
|
||||
for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \
|
||||
ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \
|
||||
@@ -274,6 +274,7 @@ void PASTEMAC(ch,varname )( \
|
||||
\
|
||||
diagoffc_i = diagoffc + (ip )*diagoffc_inc; \
|
||||
c_begin = c_cast + (ic )*vs_c; \
|
||||
p_begin = p_cast + (ip )*ps_p; \
|
||||
\
|
||||
if ( bli_is_triangular( strucc ) && \
|
||||
bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \
|
||||
@@ -389,9 +390,6 @@ void PASTEMAC(ch,varname )( \
|
||||
BLIS_CONTIG_STRIDE_ALIGN_SIZE. */ \
|
||||
p_inc = ldp * panel_len_max_i; \
|
||||
} \
|
||||
\
|
||||
\
|
||||
p_begin += p_inc; \
|
||||
} \
|
||||
\
|
||||
\
|
||||
|
||||
@@ -181,7 +181,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels )
|
||||
//Create communicators
|
||||
thread_comm_tree_t* comm_leaves = (thread_comm_tree_t*)bli_malloc( sizeof(thread_comm_tree_t) * n_threads);
|
||||
create_comms( caucuses_at_level, n_levels, 0, NULL, comm_leaves, 0 );
|
||||
thrinfo_t* info_paths = (thrinfo_t*)bli_malloc( sizeof(thrinfo_t*) * n_threads );
|
||||
thrinfo_t* info_paths = (thrinfo_t*)bli_malloc( sizeof(thrinfo_t) * n_threads );
|
||||
|
||||
//Now create paths upwards
|
||||
for( dim_t i = 0; i < n_threads; i++ )
|
||||
@@ -209,7 +209,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels )
|
||||
bli_setup_thrinfo_t(cur, comm_node->comm, ocomm_id,
|
||||
prev, caucuses_at_level[n_levels - j - 1], caucus_id );
|
||||
|
||||
cur = prev;
|
||||
prev = cur;
|
||||
comm_node = comm_node->parent;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user