From 92233cf64274b27b2217c5cfffe75443ff6137a4 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Tue, 11 Mar 2014 14:16:08 -0500 Subject: [PATCH] Some fixes to gemm thread info tree creation, Changed microkernel tests to use the new BLIS_PACKM_SINGLE_THREADED instead of BLIS_SINGLE_THREADED --- frame/1m/packm/bli_packm_blk_var1.c | 2 +- frame/3/gemm/bli_gemm_cntl.c | 2 +- frame/3/gemm/bli_gemm_threading.c | 15 ++++++++------- frame/base/bli_threading.c | 1 + testsuite/src/test_gemm_ukr.c | 4 ++-- testsuite/src/test_gemmtrsm_ukr.c | 6 +++--- testsuite/src/test_trsm_ukr.c | 4 ++-- 7 files changed, 18 insertions(+), 16 deletions(-) diff --git a/frame/1m/packm/bli_packm_blk_var1.c b/frame/1m/packm/bli_packm_blk_var1.c index 91cb8b335..d8c84425b 100644 --- a/frame/1m/packm/bli_packm_blk_var1.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -265,7 +265,7 @@ void PASTEMAC(ch,varname )( \ p_begin = p_cast; \ dim_t t_id = thread_id( thread ); \ dim_t num_threads = thread_num_threads( thread ); \ -\ + p_inc = ps_p; \ \ for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \ ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \ diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index 5c109f2dc..d10c2daf6 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -55,7 +55,7 @@ gemm_t* gemm_cntl_vl_mm; gemm_t* gemm_cntl; -dim_t gemm_caucuses_at_level[5] = {2, 1, 1, 1, 1}; +dim_t gemm_caucuses_at_level[5] = {1, 1, 2, 1, 1}; gemm_thrinfo_t* bli_gemm_cntl_get_thrinfos() { diff --git a/frame/3/gemm/bli_gemm_threading.c b/frame/3/gemm/bli_gemm_threading.c index 53511fcc0..5c0a337ff 100644 --- a/frame/3/gemm/bli_gemm_threading.c +++ b/frame/3/gemm/bli_gemm_threading.c @@ -86,7 +86,6 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels ) { - assert(n_levels == 5); dim_t jc_way = threads_at_level[0]; @@ -96,31 +95,33 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_ dim_t ir_way = threads_at_level[4]; dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way; + assert( global_num_threads != 0 ); + dim_t jc_nt = kc_way * ic_way * jr_way * ir_way; dim_t kc_nt = ic_way * jr_way * ir_way; dim_t ic_nt = jr_way * ir_way; dim_t jr_nt = ir_way; dim_t ir_nt = 1; + gemm_thrinfo_t* paths = (gemm_thrinfo_t*) malloc( global_num_threads * sizeof( gemm_thrinfo_t ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); - for( int a = 0; a < jc_nt; a++ ) + for( int a = 0; a < jc_way; a++ ) { thread_comm_t* jc_comm = bli_create_communicator( jc_nt ); - for( int b = 0; b < kc_nt; b++ ) + for( int b = 0; b < kc_way; b++ ) { thread_comm_t* kc_comm = bli_create_communicator( kc_nt ); - for( int c = 0; c < ic_nt; c++ ) + for( int c = 0; c < ic_way; c++ ) { thread_comm_t* ic_comm = bli_create_communicator( ic_nt ); - for( int d = 0; d < jr_nt; d++ ) + for( int d = 0; d < jr_way; d++ ) { thread_comm_t* jr_comm = bli_create_communicator( jr_nt ); - for( int e = 0; e < jc_nt; e++) + for( int e = 0; e < ir_way; e++) { thread_comm_t* ir_comm = bli_create_communicator( ir_nt ); - dim_t ir_comm_id = 0; dim_t jr_comm_id = e*ir_nt + ir_comm_id; dim_t ic_comm_id = d*jr_nt + jr_comm_id; diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index 9d70a1900..f830ebc2d 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -214,6 +214,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels ) void bli_get_range( void* thr, dim_t size, dim_t block_factor, dim_t* start, dim_t* end ) { thrinfo_t* thread = (thrinfo_t*) thr; + dim_t n_way = thread->n_way; dim_t work_id = thread->work_id; dim_t n_pt = size / n_way; diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index fc73eea86..7dbae77e2 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -221,8 +221,8 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a and b to ap and bp, respectively. - bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED ); - bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED ); + bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED ); + bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED ); // Repeat the experiment n_repeats times and record results. diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index f40d54eb5..f1e05572f 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -251,10 +251,10 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED ); + bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED ); // Pack the contents of b to bp. - bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED ); + bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED ); // Create subpartitions from the a and b panels. @@ -268,7 +268,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, bli_copym( &c11_save, &c11 ); // Re-pack the contents of b to bp. - bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED ); + bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED ); time = bli_clock(); diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index 2262b0a0d..86764c7d7 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -217,14 +217,14 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED ); + bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { // Re-pack the contents of b to bp. - bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED ); + bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED ); bli_copym( &c_save, &c );