mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Made barrier after packing implicit.
This also fixed a bug where barriers in the blocked variants were inserted after the inner packing routines, but not the outer packing routines. This allowed, for instance, the block of B to not be finished being packed before computation to occur.
This commit is contained in:
@@ -123,5 +123,8 @@ void bli_packm_int( obj_t* a,
|
||||
f( a,
|
||||
p,
|
||||
thread );
|
||||
|
||||
// Barrier so that packing is done before computation
|
||||
thread_obarrier( thread );
|
||||
}
|
||||
|
||||
|
||||
@@ -119,9 +119,6 @@ void bli_gemm_blk_var1f( obj_t* a,
|
||||
bli_packm_int( &c1, c1_pack,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
gemm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation.
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform gemm subproblem.
|
||||
bli_gemm_int( &BLIS_ONE,
|
||||
|
||||
@@ -119,9 +119,6 @@ void bli_gemm_blk_var2f( obj_t* a,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
gemm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform gemm subproblem.
|
||||
bli_gemm_int( &BLIS_ONE,
|
||||
a_pack,
|
||||
|
||||
@@ -115,9 +115,6 @@ void bli_gemm_blk_var3f( obj_t* a,
|
||||
bli_packm_int( &b1, b1_pack,
|
||||
cntl_sub_packm_b( cntl ),
|
||||
gemm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation.
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform gemm subproblem.
|
||||
bli_gemm_int( &BLIS_ONE,
|
||||
|
||||
@@ -116,9 +116,6 @@ void bli_herk_blk_var1f( obj_t* a,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
herk_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( &BLIS_ONE,
|
||||
a1_pack,
|
||||
|
||||
@@ -120,9 +120,6 @@ void bli_herk_blk_var3f( obj_t* a,
|
||||
// internal beta scalar with BLIS_ONE once it has been used in the
|
||||
// first iteration.
|
||||
if ( i != 0 && thread_am_ichief( thread ) ) bli_obj_scalar_reset( c_pack );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( &BLIS_ONE,
|
||||
|
||||
@@ -128,9 +128,6 @@ void bli_trmm_blk_var1f( obj_t* a,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
trmm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be finished before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trmm subproblem.
|
||||
bli_trmm_int( &BLIS_ONE,
|
||||
a1_pack,
|
||||
|
||||
@@ -116,9 +116,6 @@ void bli_trmm_blk_var2b( obj_t* a,
|
||||
bli_packm_int( &c1, c1_pack,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
trmm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be finished before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trmm subproblem.
|
||||
bli_trmm_int( &BLIS_ONE,
|
||||
|
||||
@@ -116,9 +116,6 @@ void bli_trmm_blk_var2f( obj_t* a,
|
||||
bli_packm_int( &c1, c1_pack,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
trmm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be finished before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trmm subproblem.
|
||||
bli_trmm_int( &BLIS_ONE,
|
||||
|
||||
@@ -113,9 +113,6 @@ void bli_trmm_blk_var3b( obj_t* a,
|
||||
cntl_sub_packm_b( cntl ),
|
||||
trmm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trmm subproblem.
|
||||
bli_trmm_int( &BLIS_ONE,
|
||||
a1_pack,
|
||||
|
||||
@@ -113,9 +113,6 @@ void bli_trmm_blk_var3f( obj_t* a,
|
||||
cntl_sub_packm_b( cntl ),
|
||||
trmm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trmm subproblem.
|
||||
bli_trmm_int( &BLIS_ONE,
|
||||
a1_pack,
|
||||
|
||||
@@ -117,9 +117,6 @@ void bli_trsm_blk_var2b( obj_t* a,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
trsm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trsm subproblem.
|
||||
bli_trsm_int( &BLIS_ONE,
|
||||
a_pack,
|
||||
|
||||
@@ -118,9 +118,6 @@ void bli_trsm_blk_var2f( obj_t* a,
|
||||
cntl_sub_packm_c( cntl ),
|
||||
trsm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trsm subproblem.
|
||||
bli_trsm_int( &BLIS_ONE,
|
||||
a_pack,
|
||||
|
||||
@@ -114,9 +114,6 @@ void bli_trsm_blk_var3b( obj_t* a,
|
||||
cntl_sub_packm_b( cntl ),
|
||||
trsm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trsm subproblem.
|
||||
bli_trsm_int( &BLIS_ONE,
|
||||
a1_pack,
|
||||
|
||||
@@ -114,9 +114,6 @@ void bli_trsm_blk_var3f( obj_t* a,
|
||||
cntl_sub_packm_b( cntl ),
|
||||
trsm_thread_sub_ipackm( thread ) );
|
||||
|
||||
// Packing must be done before computation
|
||||
thread_ibarrier( thread );
|
||||
|
||||
// Perform trsm subproblem.
|
||||
bli_trsm_int( &BLIS_ONE,
|
||||
a1_pack,
|
||||
|
||||
Reference in New Issue
Block a user