From bfe214b633765ed40b57b330fbb84c332663aa40 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Thu, 27 Feb 2014 15:53:10 -0600 Subject: [PATCH] Fixed bug with parallel packing, and bug with allocating an array of thread infos In packm variant 1, the variable p_begin was incremented each iteration, causing a dependency. This dependeny was removed, allowing each iteration to be executed in parallel. Somewhere in bli_threading.c, I was allocating an array of pointers instead of an array of structs. --- frame/1m/packm/bli_packm_blk_var1.c | 10 ++++------ frame/base/bli_threading.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/frame/1m/packm/bli_packm_blk_var1.c b/frame/1m/packm/bli_packm_blk_var1.c index 93c164a6d..aa0c0e56b 100644 --- a/frame/1m/packm/bli_packm_blk_var1.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -187,9 +187,6 @@ void PASTEMAC(ch,varname )( \ to pack it. */ \ if ( bli_is_zeros( uploc ) && \ bli_is_triangular( strucc ) ) return; \ -\ - dim_t t_id = thread_id( thread ); \ - dim_t num_threads = thread_num_threads( thread ); \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ @@ -266,6 +263,9 @@ void PASTEMAC(ch,varname )( \ } \ \ p_begin = p_cast; \ + dim_t t_id = thread_id( thread ); \ + dim_t num_threads = thread_num_threads( thread ); \ +\ \ for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \ ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \ @@ -274,6 +274,7 @@ void PASTEMAC(ch,varname )( \ \ diagoffc_i = diagoffc + (ip )*diagoffc_inc; \ c_begin = c_cast + (ic )*vs_c; \ + p_begin = p_cast + (ip )*ps_p; \ \ if ( bli_is_triangular( strucc ) && \ bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \ @@ -389,9 +390,6 @@ void PASTEMAC(ch,varname )( \ BLIS_CONTIG_STRIDE_ALIGN_SIZE. */ \ p_inc = ldp * panel_len_max_i; \ } \ -\ -\ - p_begin += p_inc; \ } \ \ \ diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index 77d948b77..55aa5ff2e 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -181,7 +181,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels ) //Create communicators thread_comm_tree_t* comm_leaves = (thread_comm_tree_t*)bli_malloc( sizeof(thread_comm_tree_t) * n_threads); create_comms( caucuses_at_level, n_levels, 0, NULL, comm_leaves, 0 ); - thrinfo_t* info_paths = (thrinfo_t*)bli_malloc( sizeof(thrinfo_t*) * n_threads ); + thrinfo_t* info_paths = (thrinfo_t*)bli_malloc( sizeof(thrinfo_t) * n_threads ); //Now create paths upwards for( dim_t i = 0; i < n_threads; i++ ) @@ -209,7 +209,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels ) bli_setup_thrinfo_t(cur, comm_node->comm, ocomm_id, prev, caucuses_at_level[n_levels - j - 1], caucus_id ); - cur = prev; + prev = cur; comm_node = comm_node->parent; } }