diff --git a/config/mic/bli_config.h b/config/mic/bli_config.h index 637e71f74..688ed75ec 100644 --- a/config/mic/bli_config.h +++ b/config/mic/bli_config.h @@ -36,6 +36,9 @@ #define BLIS_CONFIG_H +#define BLIS_TREE_BARRIER +#define BLIS_TREE_BARRIER_ARITY 4 + // -- OPERATING SYSTEM --------------------------------------------------------- diff --git a/frame/1m/packm/bli_packm_unb_var1.c b/frame/1m/packm/bli_packm_unb_var1.c index c7d85a78a..b344b93d6 100644 --- a/frame/1m/packm/bli_packm_unb_var1.c +++ b/frame/1m/packm/bli_packm_unb_var1.c @@ -56,7 +56,8 @@ static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1); void bli_packm_unb_var1( obj_t* c, - obj_t* p ) + obj_t* p, + packm_thrinfo_t* thread ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -98,20 +99,22 @@ void bli_packm_unb_var1( obj_t* c, // function pointer. f = ftypes[dt_cp]; - // Invoke the function. - f( strucc, - diagoffc, - diagc, - uploc, - transc, - densify, - m_p, - n_p, - m_max_p, - n_max_p, - buf_kappa, - buf_c, rs_c, cs_c, - buf_p, rs_p, cs_p ); + if( thread_am_ochief( thread ) ) { + // Invoke the function. + f( strucc, + diagoffc, + diagc, + uploc, + transc, + densify, + m_p, + n_p, + m_max_p, + n_max_p, + buf_kappa, + buf_c, rs_c, cs_c, + buf_p, rs_p, cs_p ); + } } diff --git a/frame/1m/packm/bli_packm_unb_var1.h b/frame/1m/packm/bli_packm_unb_var1.h index 1f4c451bf..25e95994e 100644 --- a/frame/1m/packm/bli_packm_unb_var1.h +++ b/frame/1m/packm/bli_packm_unb_var1.h @@ -33,7 +33,8 @@ */ void bli_packm_unb_var1( obj_t* c, - obj_t* p ); + obj_t* p, + packm_thrinfo_t* thread ); #undef GENTPROT diff --git a/frame/1m/unpackm/bli_unpackm_int.c b/frame/1m/unpackm/bli_unpackm_int.c index f94bbb423..70a520403 100644 --- a/frame/1m/unpackm/bli_unpackm_int.c +++ b/frame/1m/unpackm/bli_unpackm_int.c @@ -49,7 +49,8 @@ static FUNCPTR_T vars[2][3] = void bli_unpackm_int( obj_t* p, obj_t* a, - unpackm_t* cntl ) + unpackm_t* cntl, + packm_thrinfo_t* thread ) { // The unpackm operation consists of an optional post-process: castm. // (This post-process is analogous to the castm pre-process in packm.) @@ -122,9 +123,12 @@ void bli_unpackm_int( obj_t* p, f = vars[n][i]; // Invoke the variant. - f( p, - &c, - cntl ); + if( thread_am_ochief( thread ) ) { + f( p, + &c, + cntl ); + } + thread_obarrier( thread ); // Now, if necessary, we cast the contents of c to matrix a. If casting // was not necessary, then we are done because the call to the unpackm diff --git a/frame/1m/unpackm/bli_unpackm_int.h b/frame/1m/unpackm/bli_unpackm_int.h index 11960817c..89b8489f6 100644 --- a/frame/1m/unpackm/bli_unpackm_int.h +++ b/frame/1m/unpackm/bli_unpackm_int.h @@ -34,7 +34,8 @@ void bli_unpackm_int( obj_t* p, obj_t* a, - unpackm_t* cntl ); + unpackm_t* cntl, + packm_thrinfo_t* thread ); /* void bli_unpackm_init_cast( obj_t* p, diff --git a/frame/2/ger/bli_ger_blk_var1.c b/frame/2/ger/bli_ger_blk_var1.c index e22c69fd9..77b6ace11 100644 --- a/frame/2/ger/bli_ger_blk_var1.c +++ b/frame/2/ger/bli_ger_blk_var1.c @@ -91,7 +91,8 @@ void bli_ger_blk_var1( obj_t* alpha, // Copy/unpack A1 (if A1 was packed). bli_unpackm_int( &a1_pack, &a1, - cntl_sub_unpackm_a( cntl ) ); + cntl_sub_unpackm_a( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/ger/bli_ger_blk_var2.c b/frame/2/ger/bli_ger_blk_var2.c index a1a5eeb45..6405497aa 100644 --- a/frame/2/ger/bli_ger_blk_var2.c +++ b/frame/2/ger/bli_ger_blk_var2.c @@ -91,7 +91,8 @@ void bli_ger_blk_var2( obj_t* alpha, // Copy/unpack A1 (if A1 was packed). bli_unpackm_int( &a1_pack, &a1, - cntl_sub_unpackm_a( cntl ) ); + cntl_sub_unpackm_a( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/her/bli_her_blk_var1.c b/frame/2/her/bli_her_blk_var1.c index 7121ff0b1..45fc9c1d4 100644 --- a/frame/2/her/bli_her_blk_var1.c +++ b/frame/2/her/bli_her_blk_var1.c @@ -113,7 +113,8 @@ void bli_her_blk_var1( conj_t conjh, // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, - cntl_sub_unpackm_c11( cntl ) ); + cntl_sub_unpackm_c11( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/her/bli_her_blk_var2.c b/frame/2/her/bli_her_blk_var2.c index b9bf2154c..a856269b0 100644 --- a/frame/2/her/bli_her_blk_var2.c +++ b/frame/2/her/bli_her_blk_var2.c @@ -113,7 +113,8 @@ void bli_her_blk_var2( conj_t conjh, // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, - cntl_sub_unpackm_c11( cntl ) ); + cntl_sub_unpackm_c11( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/her2/bli_her2_blk_var1.c b/frame/2/her2/bli_her2_blk_var1.c index 645b9de79..af15b674f 100644 --- a/frame/2/her2/bli_her2_blk_var1.c +++ b/frame/2/her2/bli_her2_blk_var1.c @@ -137,7 +137,8 @@ void bli_her2_blk_var1( conj_t conjh, // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, - cntl_sub_unpackm_c11( cntl ) ); + cntl_sub_unpackm_c11( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/her2/bli_her2_blk_var2.c b/frame/2/her2/bli_her2_blk_var2.c index d6876de3e..d57da2bff 100644 --- a/frame/2/her2/bli_her2_blk_var2.c +++ b/frame/2/her2/bli_her2_blk_var2.c @@ -140,7 +140,8 @@ void bli_her2_blk_var2( conj_t conjh, // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, - cntl_sub_unpackm_c11( cntl ) ); + cntl_sub_unpackm_c11( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/her2/bli_her2_blk_var3.c b/frame/2/her2/bli_her2_blk_var3.c index 7e84b5830..8270f8dff 100644 --- a/frame/2/her2/bli_her2_blk_var3.c +++ b/frame/2/her2/bli_her2_blk_var3.c @@ -140,7 +140,8 @@ void bli_her2_blk_var3( conj_t conjh, // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, - cntl_sub_unpackm_c11( cntl ) ); + cntl_sub_unpackm_c11( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/2/her2/bli_her2_blk_var4.c b/frame/2/her2/bli_her2_blk_var4.c index 4760606f9..77b750230 100644 --- a/frame/2/her2/bli_her2_blk_var4.c +++ b/frame/2/her2/bli_her2_blk_var4.c @@ -137,7 +137,8 @@ void bli_her2_blk_var4( conj_t conjh, // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, - cntl_sub_unpackm_c11( cntl ) ); + cntl_sub_unpackm_c11( cntl ), + &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/gemm/bli_gemm_blk_var1f.c b/frame/3/gemm/bli_gemm_blk_var1f.c index 29c4670af..a1b93eb1a 100644 --- a/frame/3/gemm/bli_gemm_blk_var1f.c +++ b/frame/3/gemm/bli_gemm_blk_var1f.c @@ -131,13 +131,9 @@ void bli_gemm_blk_var1f( obj_t* a, // Unpack C1 (if C1 was packed). // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + gemm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/gemm/bli_gemm_blk_var2f.c b/frame/3/gemm/bli_gemm_blk_var2f.c index dd8a073d4..61ea352b9 100644 --- a/frame/3/gemm/bli_gemm_blk_var2f.c +++ b/frame/3/gemm/bli_gemm_blk_var2f.c @@ -130,13 +130,9 @@ void bli_gemm_blk_var2f( obj_t* a, // Unpack C1 (if C1 was packed). // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + gemm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/gemm/bli_gemm_blk_var3f.c b/frame/3/gemm/bli_gemm_blk_var3f.c index 3f723d43c..f1114daaf 100644 --- a/frame/3/gemm/bli_gemm_blk_var3f.c +++ b/frame/3/gemm/bli_gemm_blk_var3f.c @@ -139,14 +139,14 @@ void bli_gemm_blk_var3f( obj_t* a, thread_obarrier( thread ); // Unpack C (if C was packed). - if( thread_am_ochief( thread ) ){ - bli_unpackm_int( c_pack, c, - cntl_sub_unpackm_c( cntl ) ); - bli_obj_release_pack( c_pack ); - } + bli_unpackm_int( c_pack, c, + cntl_sub_unpackm_c( cntl ), + gemm_thread_sub_opackm( thread ) ); // If any packing buffers were acquired within packm, release them back // to the memory manager. + if( thread_am_ochief( thread ) ) + bli_obj_release_pack( c_pack ); if( thread_am_ichief( thread ) ){ bli_obj_release_pack( a1_pack ); bli_obj_release_pack( b1_pack ); diff --git a/frame/3/herk/bli_herk_blk_var1f.c b/frame/3/herk/bli_herk_blk_var1f.c index 880a06110..fa184e2b0 100644 --- a/frame/3/herk/bli_herk_blk_var1f.c +++ b/frame/3/herk/bli_herk_blk_var1f.c @@ -126,12 +126,9 @@ void bli_herk_blk_var1f( obj_t* a, herk_thread_sub_herk( thread ) ); // Unpack C1 (if C1 was packed). - // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + herk_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/herk/bli_herk_blk_var2f.c b/frame/3/herk/bli_herk_blk_var2f.c index 45b4d423a..8496b0852 100644 --- a/frame/3/herk/bli_herk_blk_var2f.c +++ b/frame/3/herk/bli_herk_blk_var2f.c @@ -142,11 +142,9 @@ void bli_herk_blk_var2f( obj_t* a, herk_thread_sub_herk( thread ) ); // Unpack C1 (if C1 was packed). - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1S_pack, &c1S, - cntl_sub_unpackm_c( cntl ) ); - } - thread_ibarrier( thread ); + bli_unpackm_int( c1S_pack, &c1S, + cntl_sub_unpackm_c( cntl ), + herk_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/herk/bli_herk_blk_var3f.c b/frame/3/herk/bli_herk_blk_var3f.c index 96e9da471..943109156 100644 --- a/frame/3/herk/bli_herk_blk_var3f.c +++ b/frame/3/herk/bli_herk_blk_var3f.c @@ -136,14 +136,15 @@ void bli_herk_blk_var3f( obj_t* a, thread_obarrier( thread ); // Unpack C (if C was packed). - if( thread_am_ochief( thread ) ) { - bli_unpackm_int( c_pack, c, - cntl_sub_unpackm_c( cntl ) ); - bli_obj_release_pack( c_pack ); - } + bli_unpackm_int( c_pack, c, + cntl_sub_unpackm_c( cntl ), + herk_thread_sub_opackm( thread ) ); // If any packing buffers were acquired within packm, release them back // to the memory manager. + if( thread_am_ochief( thread ) ) { + bli_obj_release_pack( c_pack ); + } if( thread_am_ichief( thread ) ) { bli_obj_release_pack( a1_pack ); bli_obj_release_pack( ah1_pack ); diff --git a/frame/3/trmm/bli_trmm_blk_var1f.c b/frame/3/trmm/bli_trmm_blk_var1f.c index 4d4e87ade..fff6cc7fc 100644 --- a/frame/3/trmm/bli_trmm_blk_var1f.c +++ b/frame/3/trmm/bli_trmm_blk_var1f.c @@ -137,14 +137,9 @@ void bli_trmm_blk_var1f( obj_t* a, trmm_thread_sub_trmm( thread ) ); // Unpack C1 (if C1 was packed). - // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + trmm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/trmm/bli_trmm_blk_var2b.c b/frame/3/trmm/bli_trmm_blk_var2b.c index 18c580fa9..25f07d031 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.c +++ b/frame/3/trmm/bli_trmm_blk_var2b.c @@ -83,7 +83,7 @@ void bli_trmm_blk_var2b( obj_t* a, n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, 0, &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, bli_obj_is_upper( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) @@ -127,14 +127,9 @@ void bli_trmm_blk_var2b( obj_t* a, trmm_thread_sub_trmm( thread ) ); // Unpack C1 (if C1 was packed). - // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + trmm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/trmm/bli_trmm_blk_var2f.c b/frame/3/trmm/bli_trmm_blk_var2f.c index 68cd11033..0077ea9cb 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.c +++ b/frame/3/trmm/bli_trmm_blk_var2f.c @@ -83,7 +83,7 @@ void bli_trmm_blk_var2f( obj_t* a, n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, 1, &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, bli_obj_is_lower( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) @@ -127,14 +127,9 @@ void bli_trmm_blk_var2f( obj_t* a, trmm_thread_sub_trmm( thread ) ); // Unpack C1 (if C1 was packed). - // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + trmm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/trmm/bli_trmm_blk_var3b.c b/frame/3/trmm/bli_trmm_blk_var3b.c index f2ccd38a6..6a1191936 100644 --- a/frame/3/trmm/bli_trmm_blk_var3b.c +++ b/frame/3/trmm/bli_trmm_blk_var3b.c @@ -126,14 +126,15 @@ void bli_trmm_blk_var3b( obj_t* a, thread_obarrier( thread ); // Unpack C (if C was packed). - if( thread_am_ochief( thread ) ){ - bli_unpackm_int( c_pack, c, - cntl_sub_unpackm_c( cntl ) ); - bli_obj_release_pack( c_pack ); - } + bli_unpackm_int( c_pack, c, + cntl_sub_unpackm_c( cntl ), + trmm_thread_sub_opackm( thread ) ); // If any packing buffers were acquired within packm, release them back // to the memory manager. + if( thread_am_ochief( thread ) ){ + bli_obj_release_pack( c_pack ); + } if( thread_am_ichief( thread ) ){ bli_obj_release_pack( a1_pack ); bli_obj_release_pack( b1_pack ); diff --git a/frame/3/trmm/bli_trmm_blk_var3f.c b/frame/3/trmm/bli_trmm_blk_var3f.c index c361d6b23..67a4aa880 100644 --- a/frame/3/trmm/bli_trmm_blk_var3f.c +++ b/frame/3/trmm/bli_trmm_blk_var3f.c @@ -126,14 +126,15 @@ void bli_trmm_blk_var3f( obj_t* a, thread_obarrier( thread ); // Unpack C (if C was packed). - if( thread_am_ochief( thread ) ){ - bli_unpackm_int( c_pack, c, - cntl_sub_unpackm_c( cntl ) ); - bli_obj_release_pack( c_pack ); - } + bli_unpackm_int( c_pack, c, + cntl_sub_unpackm_c( cntl ), + trmm_thread_sub_opackm( thread ) ); // If any packing buffers were acquired within packm, release them back // to the memory manager. + if( thread_am_ochief( thread ) ){ + bli_obj_release_pack( c_pack ); + } if( thread_am_ichief( thread ) ){ bli_obj_release_pack( a1_pack ); bli_obj_release_pack( b1_pack ); diff --git a/frame/3/trsm/bli_trsm_blk_var2b.c b/frame/3/trsm/bli_trsm_blk_var2b.c index d8f29513a..c4ad6e7bd 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.c +++ b/frame/3/trsm/bli_trsm_blk_var2b.c @@ -127,14 +127,9 @@ void bli_trsm_blk_var2b( obj_t* a, trsm_thread_sub_trsm( thread ) ); // Unpack C1 (if C1 was packed). - // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + trsm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/trsm/bli_trsm_blk_var2f.c b/frame/3/trsm/bli_trsm_blk_var2f.c index 038e035f9..54e165029 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.c +++ b/frame/3/trsm/bli_trsm_blk_var2f.c @@ -128,14 +128,9 @@ void bli_trsm_blk_var2f( obj_t* a, trsm_thread_sub_trsm( thread ) ); // Unpack C1 (if C1 was packed). - // Currently must be done by 1 thread - if( thread_am_ichief( thread ) ) { - bli_unpackm_int( c1_pack, &c1, - cntl_sub_unpackm_c( cntl ) ); - } - //Barrier to make sure unpacking is done before next iteration's packing of C - //Somehow, we'd like to make this a noop if packing isn't done. - thread_ibarrier( thread ); + bli_unpackm_int( c1_pack, &c1, + cntl_sub_unpackm_c( cntl ), + trsm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back diff --git a/frame/3/trsm/bli_trsm_blk_var3b.c b/frame/3/trsm/bli_trsm_blk_var3b.c index b43f9f0f8..dd6b2c0c7 100644 --- a/frame/3/trsm/bli_trsm_blk_var3b.c +++ b/frame/3/trsm/bli_trsm_blk_var3b.c @@ -137,14 +137,15 @@ void bli_trsm_blk_var3b( obj_t* a, thread_obarrier( thread ); // Unpack C (if C was packed). - if( thread_am_ochief( thread ) ) { - bli_unpackm_int( c_pack, c, - cntl_sub_unpackm_c( cntl ) ); - bli_obj_release_pack( c_pack ); - } + bli_unpackm_int( c_pack, c, + cntl_sub_unpackm_c( cntl ), + trsm_thread_sub_opackm( thread ) ); // If any packing buffers were acquired within packm, release them back // to the memory manager. + if( thread_am_ochief( thread ) ) { + bli_obj_release_pack( c_pack ); + } if( thread_am_ichief( thread ) ) { bli_obj_release_pack( a1_pack ); bli_obj_release_pack( b1_pack ); diff --git a/frame/3/trsm/bli_trsm_blk_var3f.c b/frame/3/trsm/bli_trsm_blk_var3f.c index 84ad3ed16..466fd4461 100644 --- a/frame/3/trsm/bli_trsm_blk_var3f.c +++ b/frame/3/trsm/bli_trsm_blk_var3f.c @@ -137,14 +137,15 @@ void bli_trsm_blk_var3f( obj_t* a, thread_obarrier( thread ); // Unpack C (if C was packed). - if( thread_am_ochief( thread ) ) { - bli_unpackm_int( c_pack, c, - cntl_sub_unpackm_c( cntl ) ); - bli_obj_release_pack( c_pack ); - } + bli_unpackm_int( c_pack, c, + cntl_sub_unpackm_c( cntl ), + trsm_thread_sub_opackm( thread ) ); // If any packing buffers were acquired within packm, release them back // to the memory manager. + if( thread_am_ochief( thread ) ) { + bli_obj_release_pack( c_pack ); + } if( thread_am_ichief( thread ) ) { bli_obj_release_pack( a1_pack ); bli_obj_release_pack( b1_pack ); diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index 1efd53480..4f77d75f5 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -35,7 +35,7 @@ #include "blis.h" #ifdef BLIS_TREE_BARRIER -barrier_t* bli_free_barrier_tree( barrier_t* barrier ) +void bli_free_barrier_tree( barrier_t* barrier ) { if( barrier == NULL ) return; diff --git a/frame/base/bli_threading.h b/frame/base/bli_threading.h index 0ca6fdf4f..aa8bd8152 100644 --- a/frame/base/bli_threading.h +++ b/frame/base/bli_threading.h @@ -34,9 +34,6 @@ #ifndef BLIS_THREADING_H #define BLIS_THREADING_H -#define BLIS_TREE_BARRIER -#define BLIS_TREE_BARRIER_ARITY 4 - #ifdef BLIS_TREE_BARRIER struct barrier_s {