Made barrier after packing implicit.

This also fixed a bug where barriers in the blocked variants were inserted after the inner packing routines,
but not the outer packing routines.
This allowed, for instance, the block of B to not be finished being packed before computation to occur.
This commit is contained in:
Tyler Smith
2014-03-27 17:06:45 -05:00
parent 9f78ec6e7e
commit 459dde4acc
15 changed files with 3 additions and 42 deletions

View File

@@ -123,5 +123,8 @@ void bli_packm_int( obj_t* a,
f( a,
p,
thread );
// Barrier so that packing is done before computation
thread_obarrier( thread );
}

View File

@@ -119,9 +119,6 @@ void bli_gemm_blk_var1f( obj_t* a,
bli_packm_int( &c1, c1_pack,
cntl_sub_packm_c( cntl ),
gemm_thread_sub_ipackm( thread ) );
// Packing must be done before computation.
thread_ibarrier( thread );
// Perform gemm subproblem.
bli_gemm_int( &BLIS_ONE,

View File

@@ -119,9 +119,6 @@ void bli_gemm_blk_var2f( obj_t* a,
cntl_sub_packm_c( cntl ),
gemm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform gemm subproblem.
bli_gemm_int( &BLIS_ONE,
a_pack,

View File

@@ -115,9 +115,6 @@ void bli_gemm_blk_var3f( obj_t* a,
bli_packm_int( &b1, b1_pack,
cntl_sub_packm_b( cntl ),
gemm_thread_sub_ipackm( thread ) );
// Packing must be done before computation.
thread_ibarrier( thread );
// Perform gemm subproblem.
bli_gemm_int( &BLIS_ONE,

View File

@@ -116,9 +116,6 @@ void bli_herk_blk_var1f( obj_t* a,
cntl_sub_packm_c( cntl ),
herk_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform herk subproblem.
bli_herk_int( &BLIS_ONE,
a1_pack,

View File

@@ -120,9 +120,6 @@ void bli_herk_blk_var3f( obj_t* a,
// internal beta scalar with BLIS_ONE once it has been used in the
// first iteration.
if ( i != 0 && thread_am_ichief( thread ) ) bli_obj_scalar_reset( c_pack );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform herk subproblem.
bli_herk_int( &BLIS_ONE,

View File

@@ -128,9 +128,6 @@ void bli_trmm_blk_var1f( obj_t* a,
cntl_sub_packm_c( cntl ),
trmm_thread_sub_ipackm( thread ) );
// Packing must be finished before computation
thread_ibarrier( thread );
// Perform trmm subproblem.
bli_trmm_int( &BLIS_ONE,
a1_pack,

View File

@@ -116,9 +116,6 @@ void bli_trmm_blk_var2b( obj_t* a,
bli_packm_int( &c1, c1_pack,
cntl_sub_packm_c( cntl ),
trmm_thread_sub_ipackm( thread ) );
// Packing must be finished before computation
thread_ibarrier( thread );
// Perform trmm subproblem.
bli_trmm_int( &BLIS_ONE,

View File

@@ -116,9 +116,6 @@ void bli_trmm_blk_var2f( obj_t* a,
bli_packm_int( &c1, c1_pack,
cntl_sub_packm_c( cntl ),
trmm_thread_sub_ipackm( thread ) );
// Packing must be finished before computation
thread_ibarrier( thread );
// Perform trmm subproblem.
bli_trmm_int( &BLIS_ONE,

View File

@@ -113,9 +113,6 @@ void bli_trmm_blk_var3b( obj_t* a,
cntl_sub_packm_b( cntl ),
trmm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform trmm subproblem.
bli_trmm_int( &BLIS_ONE,
a1_pack,

View File

@@ -113,9 +113,6 @@ void bli_trmm_blk_var3f( obj_t* a,
cntl_sub_packm_b( cntl ),
trmm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform trmm subproblem.
bli_trmm_int( &BLIS_ONE,
a1_pack,

View File

@@ -117,9 +117,6 @@ void bli_trsm_blk_var2b( obj_t* a,
cntl_sub_packm_c( cntl ),
trsm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform trsm subproblem.
bli_trsm_int( &BLIS_ONE,
a_pack,

View File

@@ -118,9 +118,6 @@ void bli_trsm_blk_var2f( obj_t* a,
cntl_sub_packm_c( cntl ),
trsm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform trsm subproblem.
bli_trsm_int( &BLIS_ONE,
a_pack,

View File

@@ -114,9 +114,6 @@ void bli_trsm_blk_var3b( obj_t* a,
cntl_sub_packm_b( cntl ),
trsm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform trsm subproblem.
bli_trsm_int( &BLIS_ONE,
a1_pack,

View File

@@ -114,9 +114,6 @@ void bli_trsm_blk_var3f( obj_t* a,
cntl_sub_packm_b( cntl ),
trsm_thread_sub_ipackm( thread ) );
// Packing must be done before computation
thread_ibarrier( thread );
// Perform trsm subproblem.
bli_trsm_int( &BLIS_ONE,
a1_pack,