From 459dde4acc09e49380da58fb7b246db488884ad9 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Thu, 27 Mar 2014 17:06:45 -0500 Subject: [PATCH] Made barrier after packing implicit. This also fixed a bug where barriers in the blocked variants were inserted after the inner packing routines, but not the outer packing routines. This allowed, for instance, the block of B to not be finished being packed before computation to occur. --- frame/1m/packm/bli_packm_int.c | 3 +++ frame/3/gemm/bli_gemm_blk_var1f.c | 3 --- frame/3/gemm/bli_gemm_blk_var2f.c | 3 --- frame/3/gemm/bli_gemm_blk_var3f.c | 3 --- frame/3/herk/bli_herk_blk_var1f.c | 3 --- frame/3/herk/bli_herk_blk_var3f.c | 3 --- frame/3/trmm/bli_trmm_blk_var1f.c | 3 --- frame/3/trmm/bli_trmm_blk_var2b.c | 3 --- frame/3/trmm/bli_trmm_blk_var2f.c | 3 --- frame/3/trmm/bli_trmm_blk_var3b.c | 3 --- frame/3/trmm/bli_trmm_blk_var3f.c | 3 --- frame/3/trsm/bli_trsm_blk_var2b.c | 3 --- frame/3/trsm/bli_trsm_blk_var2f.c | 3 --- frame/3/trsm/bli_trsm_blk_var3b.c | 3 --- frame/3/trsm/bli_trsm_blk_var3f.c | 3 --- 15 files changed, 3 insertions(+), 42 deletions(-) diff --git a/frame/1m/packm/bli_packm_int.c b/frame/1m/packm/bli_packm_int.c index 2a7dc991c..a3d89b679 100644 --- a/frame/1m/packm/bli_packm_int.c +++ b/frame/1m/packm/bli_packm_int.c @@ -123,5 +123,8 @@ void bli_packm_int( obj_t* a, f( a, p, thread ); + + // Barrier so that packing is done before computation + thread_obarrier( thread ); } diff --git a/frame/3/gemm/bli_gemm_blk_var1f.c b/frame/3/gemm/bli_gemm_blk_var1f.c index c3e5db6c0..2ba71c536 100644 --- a/frame/3/gemm/bli_gemm_blk_var1f.c +++ b/frame/3/gemm/bli_gemm_blk_var1f.c @@ -119,9 +119,6 @@ void bli_gemm_blk_var1f( obj_t* a, bli_packm_int( &c1, c1_pack, cntl_sub_packm_c( cntl ), gemm_thread_sub_ipackm( thread ) ); - - // Packing must be done before computation. - thread_ibarrier( thread ); // Perform gemm subproblem. bli_gemm_int( &BLIS_ONE, diff --git a/frame/3/gemm/bli_gemm_blk_var2f.c b/frame/3/gemm/bli_gemm_blk_var2f.c index 82aad8b3d..cab440c55 100644 --- a/frame/3/gemm/bli_gemm_blk_var2f.c +++ b/frame/3/gemm/bli_gemm_blk_var2f.c @@ -119,9 +119,6 @@ void bli_gemm_blk_var2f( obj_t* a, cntl_sub_packm_c( cntl ), gemm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform gemm subproblem. bli_gemm_int( &BLIS_ONE, a_pack, diff --git a/frame/3/gemm/bli_gemm_blk_var3f.c b/frame/3/gemm/bli_gemm_blk_var3f.c index a6a70181b..c5af97f94 100644 --- a/frame/3/gemm/bli_gemm_blk_var3f.c +++ b/frame/3/gemm/bli_gemm_blk_var3f.c @@ -115,9 +115,6 @@ void bli_gemm_blk_var3f( obj_t* a, bli_packm_int( &b1, b1_pack, cntl_sub_packm_b( cntl ), gemm_thread_sub_ipackm( thread ) ); - - // Packing must be done before computation. - thread_ibarrier( thread ); // Perform gemm subproblem. bli_gemm_int( &BLIS_ONE, diff --git a/frame/3/herk/bli_herk_blk_var1f.c b/frame/3/herk/bli_herk_blk_var1f.c index 899aa194c..7d4fa4375 100644 --- a/frame/3/herk/bli_herk_blk_var1f.c +++ b/frame/3/herk/bli_herk_blk_var1f.c @@ -116,9 +116,6 @@ void bli_herk_blk_var1f( obj_t* a, cntl_sub_packm_c( cntl ), herk_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform herk subproblem. bli_herk_int( &BLIS_ONE, a1_pack, diff --git a/frame/3/herk/bli_herk_blk_var3f.c b/frame/3/herk/bli_herk_blk_var3f.c index 78e3cd30e..4c86adc8e 100644 --- a/frame/3/herk/bli_herk_blk_var3f.c +++ b/frame/3/herk/bli_herk_blk_var3f.c @@ -120,9 +120,6 @@ void bli_herk_blk_var3f( obj_t* a, // internal beta scalar with BLIS_ONE once it has been used in the // first iteration. if ( i != 0 && thread_am_ichief( thread ) ) bli_obj_scalar_reset( c_pack ); - - // Packing must be done before computation - thread_ibarrier( thread ); // Perform herk subproblem. bli_herk_int( &BLIS_ONE, diff --git a/frame/3/trmm/bli_trmm_blk_var1f.c b/frame/3/trmm/bli_trmm_blk_var1f.c index ac1973366..e71ec01b6 100644 --- a/frame/3/trmm/bli_trmm_blk_var1f.c +++ b/frame/3/trmm/bli_trmm_blk_var1f.c @@ -128,9 +128,6 @@ void bli_trmm_blk_var1f( obj_t* a, cntl_sub_packm_c( cntl ), trmm_thread_sub_ipackm( thread ) ); - // Packing must be finished before computation - thread_ibarrier( thread ); - // Perform trmm subproblem. bli_trmm_int( &BLIS_ONE, a1_pack, diff --git a/frame/3/trmm/bli_trmm_blk_var2b.c b/frame/3/trmm/bli_trmm_blk_var2b.c index 86787a80a..dae4f04e7 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.c +++ b/frame/3/trmm/bli_trmm_blk_var2b.c @@ -116,9 +116,6 @@ void bli_trmm_blk_var2b( obj_t* a, bli_packm_int( &c1, c1_pack, cntl_sub_packm_c( cntl ), trmm_thread_sub_ipackm( thread ) ); - - // Packing must be finished before computation - thread_ibarrier( thread ); // Perform trmm subproblem. bli_trmm_int( &BLIS_ONE, diff --git a/frame/3/trmm/bli_trmm_blk_var2f.c b/frame/3/trmm/bli_trmm_blk_var2f.c index 39033fcf3..d0959fc6e 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.c +++ b/frame/3/trmm/bli_trmm_blk_var2f.c @@ -116,9 +116,6 @@ void bli_trmm_blk_var2f( obj_t* a, bli_packm_int( &c1, c1_pack, cntl_sub_packm_c( cntl ), trmm_thread_sub_ipackm( thread ) ); - - // Packing must be finished before computation - thread_ibarrier( thread ); // Perform trmm subproblem. bli_trmm_int( &BLIS_ONE, diff --git a/frame/3/trmm/bli_trmm_blk_var3b.c b/frame/3/trmm/bli_trmm_blk_var3b.c index 40e9e21d6..f2ccd38a6 100644 --- a/frame/3/trmm/bli_trmm_blk_var3b.c +++ b/frame/3/trmm/bli_trmm_blk_var3b.c @@ -113,9 +113,6 @@ void bli_trmm_blk_var3b( obj_t* a, cntl_sub_packm_b( cntl ), trmm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform trmm subproblem. bli_trmm_int( &BLIS_ONE, a1_pack, diff --git a/frame/3/trmm/bli_trmm_blk_var3f.c b/frame/3/trmm/bli_trmm_blk_var3f.c index 80293e42f..c361d6b23 100644 --- a/frame/3/trmm/bli_trmm_blk_var3f.c +++ b/frame/3/trmm/bli_trmm_blk_var3f.c @@ -113,9 +113,6 @@ void bli_trmm_blk_var3f( obj_t* a, cntl_sub_packm_b( cntl ), trmm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform trmm subproblem. bli_trmm_int( &BLIS_ONE, a1_pack, diff --git a/frame/3/trsm/bli_trsm_blk_var2b.c b/frame/3/trsm/bli_trsm_blk_var2b.c index 435c9dec3..eadda1c37 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.c +++ b/frame/3/trsm/bli_trsm_blk_var2b.c @@ -117,9 +117,6 @@ void bli_trsm_blk_var2b( obj_t* a, cntl_sub_packm_c( cntl ), trsm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform trsm subproblem. bli_trsm_int( &BLIS_ONE, a_pack, diff --git a/frame/3/trsm/bli_trsm_blk_var2f.c b/frame/3/trsm/bli_trsm_blk_var2f.c index 43b46b752..e81875a4d 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.c +++ b/frame/3/trsm/bli_trsm_blk_var2f.c @@ -118,9 +118,6 @@ void bli_trsm_blk_var2f( obj_t* a, cntl_sub_packm_c( cntl ), trsm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform trsm subproblem. bli_trsm_int( &BLIS_ONE, a_pack, diff --git a/frame/3/trsm/bli_trsm_blk_var3b.c b/frame/3/trsm/bli_trsm_blk_var3b.c index 3e586cdfc..680353d68 100644 --- a/frame/3/trsm/bli_trsm_blk_var3b.c +++ b/frame/3/trsm/bli_trsm_blk_var3b.c @@ -114,9 +114,6 @@ void bli_trsm_blk_var3b( obj_t* a, cntl_sub_packm_b( cntl ), trsm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform trsm subproblem. bli_trsm_int( &BLIS_ONE, a1_pack, diff --git a/frame/3/trsm/bli_trsm_blk_var3f.c b/frame/3/trsm/bli_trsm_blk_var3f.c index 2a3384a2b..80d15b477 100644 --- a/frame/3/trsm/bli_trsm_blk_var3f.c +++ b/frame/3/trsm/bli_trsm_blk_var3f.c @@ -114,9 +114,6 @@ void bli_trsm_blk_var3f( obj_t* a, cntl_sub_packm_b( cntl ), trsm_thread_sub_ipackm( thread ) ); - // Packing must be done before computation - thread_ibarrier( thread ); - // Perform trsm subproblem. bli_trsm_int( &BLIS_ONE, a1_pack,