Bunch of minor fixes

Removed barrier after unpackm in all level3 blocked variants
Now there is an implicit barrier inside unpackm that only occurs if C is packed (which is usually not the case)

Moved the enabling of the tree barriers into bli_config.h
Fed the default MR and NR for double precision into bli_get_range instead of the number 8
This commit is contained in:
Tyler Smith
2014-04-04 15:09:10 -05:00
parent 575fb9b0b0
commit 5ec93bd9a7
30 changed files with 114 additions and 130 deletions

View File

@@ -36,6 +36,9 @@
#define BLIS_CONFIG_H
#define BLIS_TREE_BARRIER
#define BLIS_TREE_BARRIER_ARITY 4
// -- OPERATING SYSTEM ---------------------------------------------------------

View File

@@ -56,7 +56,8 @@ static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
void bli_packm_unb_var1( obj_t* c,
obj_t* p )
obj_t* p,
packm_thrinfo_t* thread )
{
num_t dt_cp = bli_obj_datatype( *c );
@@ -98,20 +99,22 @@ void bli_packm_unb_var1( obj_t* c,
// function pointer.
f = ftypes[dt_cp];
// Invoke the function.
f( strucc,
diagoffc,
diagc,
uploc,
transc,
densify,
m_p,
n_p,
m_max_p,
n_max_p,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p );
if( thread_am_ochief( thread ) ) {
// Invoke the function.
f( strucc,
diagoffc,
diagc,
uploc,
transc,
densify,
m_p,
n_p,
m_max_p,
n_max_p,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p );
}
}

View File

@@ -33,7 +33,8 @@
*/
void bli_packm_unb_var1( obj_t* c,
obj_t* p );
obj_t* p,
packm_thrinfo_t* thread );
#undef GENTPROT

View File

@@ -49,7 +49,8 @@ static FUNCPTR_T vars[2][3] =
void bli_unpackm_int( obj_t* p,
obj_t* a,
unpackm_t* cntl )
unpackm_t* cntl,
packm_thrinfo_t* thread )
{
// The unpackm operation consists of an optional post-process: castm.
// (This post-process is analogous to the castm pre-process in packm.)
@@ -122,9 +123,12 @@ void bli_unpackm_int( obj_t* p,
f = vars[n][i];
// Invoke the variant.
f( p,
&c,
cntl );
if( thread_am_ochief( thread ) ) {
f( p,
&c,
cntl );
}
thread_obarrier( thread );
// Now, if necessary, we cast the contents of c to matrix a. If casting
// was not necessary, then we are done because the call to the unpackm

View File

@@ -34,7 +34,8 @@
void bli_unpackm_int( obj_t* p,
obj_t* a,
unpackm_t* cntl );
unpackm_t* cntl,
packm_thrinfo_t* thread );
/*
void bli_unpackm_init_cast( obj_t* p,

View File

@@ -91,7 +91,8 @@ void bli_ger_blk_var1( obj_t* alpha,
// Copy/unpack A1 (if A1 was packed).
bli_unpackm_int( &a1_pack, &a1,
cntl_sub_unpackm_a( cntl ) );
cntl_sub_unpackm_a( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -91,7 +91,8 @@ void bli_ger_blk_var2( obj_t* alpha,
// Copy/unpack A1 (if A1 was packed).
bli_unpackm_int( &a1_pack, &a1,
cntl_sub_unpackm_a( cntl ) );
cntl_sub_unpackm_a( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -113,7 +113,8 @@ void bli_her_blk_var1( conj_t conjh,
// Copy/unpack C11 (if C11 was packed).
bli_unpackm_int( &c11_pack, &c11,
cntl_sub_unpackm_c11( cntl ) );
cntl_sub_unpackm_c11( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -113,7 +113,8 @@ void bli_her_blk_var2( conj_t conjh,
// Copy/unpack C11 (if C11 was packed).
bli_unpackm_int( &c11_pack, &c11,
cntl_sub_unpackm_c11( cntl ) );
cntl_sub_unpackm_c11( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -137,7 +137,8 @@ void bli_her2_blk_var1( conj_t conjh,
// Copy/unpack C11 (if C11 was packed).
bli_unpackm_int( &c11_pack, &c11,
cntl_sub_unpackm_c11( cntl ) );
cntl_sub_unpackm_c11( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -140,7 +140,8 @@ void bli_her2_blk_var2( conj_t conjh,
// Copy/unpack C11 (if C11 was packed).
bli_unpackm_int( &c11_pack, &c11,
cntl_sub_unpackm_c11( cntl ) );
cntl_sub_unpackm_c11( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -140,7 +140,8 @@ void bli_her2_blk_var3( conj_t conjh,
// Copy/unpack C11 (if C11 was packed).
bli_unpackm_int( &c11_pack, &c11,
cntl_sub_unpackm_c11( cntl ) );
cntl_sub_unpackm_c11( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -137,7 +137,8 @@ void bli_her2_blk_var4( conj_t conjh,
// Copy/unpack C11 (if C11 was packed).
bli_unpackm_int( &c11_pack, &c11,
cntl_sub_unpackm_c11( cntl ) );
cntl_sub_unpackm_c11( cntl ),
&BLIS_PACKM_SINGLE_THREADED );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -131,13 +131,9 @@ void bli_gemm_blk_var1f( obj_t* a,
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
gemm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -130,13 +130,9 @@ void bli_gemm_blk_var2f( obj_t* a,
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
gemm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -139,14 +139,14 @@ void bli_gemm_blk_var3f( obj_t* a,
thread_obarrier( thread );
// Unpack C (if C was packed).
if( thread_am_ochief( thread ) ){
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ) );
bli_obj_release_pack( c_pack );
}
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ),
gemm_thread_sub_opackm( thread ) );
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
if( thread_am_ochief( thread ) )
bli_obj_release_pack( c_pack );
if( thread_am_ichief( thread ) ){
bli_obj_release_pack( a1_pack );
bli_obj_release_pack( b1_pack );

View File

@@ -126,12 +126,9 @@ void bli_herk_blk_var1f( obj_t* a,
herk_thread_sub_herk( thread ) );
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
herk_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -142,11 +142,9 @@ void bli_herk_blk_var2f( obj_t* a,
herk_thread_sub_herk( thread ) );
// Unpack C1 (if C1 was packed).
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1S_pack, &c1S,
cntl_sub_unpackm_c( cntl ) );
}
thread_ibarrier( thread );
bli_unpackm_int( c1S_pack, &c1S,
cntl_sub_unpackm_c( cntl ),
herk_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -136,14 +136,15 @@ void bli_herk_blk_var3f( obj_t* a,
thread_obarrier( thread );
// Unpack C (if C was packed).
if( thread_am_ochief( thread ) ) {
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ) );
bli_obj_release_pack( c_pack );
}
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ),
herk_thread_sub_opackm( thread ) );
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
if( thread_am_ochief( thread ) ) {
bli_obj_release_pack( c_pack );
}
if( thread_am_ichief( thread ) ) {
bli_obj_release_pack( a1_pack );
bli_obj_release_pack( ah1_pack );

View File

@@ -137,14 +137,9 @@ void bli_trmm_blk_var1f( obj_t* a,
trmm_thread_sub_trmm( thread ) );
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
trmm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -83,7 +83,7 @@ void bli_trmm_blk_var2b( obj_t* a,
n_trans = bli_obj_width_after_trans( *b );
dim_t start, end;
//bli_get_range( thread, 0, n_trans, 8, &start, &end );
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, 0, &start, &end );
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, bli_obj_is_upper( *c ), &start, &end );
// Partition along the n dimension.
for ( i = start; i < end; i += b_alg )
@@ -127,14 +127,9 @@ void bli_trmm_blk_var2b( obj_t* a,
trmm_thread_sub_trmm( thread ) );
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
trmm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -83,7 +83,7 @@ void bli_trmm_blk_var2f( obj_t* a,
n_trans = bli_obj_width_after_trans( *b );
dim_t start, end;
//bli_get_range( thread, 0, n_trans, 8, &start, &end );
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, 1, &start, &end );
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NC_D, bli_obj_is_lower( *c ), &start, &end );
// Partition along the n dimension.
for ( i = start; i < end; i += b_alg )
@@ -127,14 +127,9 @@ void bli_trmm_blk_var2f( obj_t* a,
trmm_thread_sub_trmm( thread ) );
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
trmm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -126,14 +126,15 @@ void bli_trmm_blk_var3b( obj_t* a,
thread_obarrier( thread );
// Unpack C (if C was packed).
if( thread_am_ochief( thread ) ){
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ) );
bli_obj_release_pack( c_pack );
}
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ),
trmm_thread_sub_opackm( thread ) );
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
if( thread_am_ochief( thread ) ){
bli_obj_release_pack( c_pack );
}
if( thread_am_ichief( thread ) ){
bli_obj_release_pack( a1_pack );
bli_obj_release_pack( b1_pack );

View File

@@ -126,14 +126,15 @@ void bli_trmm_blk_var3f( obj_t* a,
thread_obarrier( thread );
// Unpack C (if C was packed).
if( thread_am_ochief( thread ) ){
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ) );
bli_obj_release_pack( c_pack );
}
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ),
trmm_thread_sub_opackm( thread ) );
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
if( thread_am_ochief( thread ) ){
bli_obj_release_pack( c_pack );
}
if( thread_am_ichief( thread ) ){
bli_obj_release_pack( a1_pack );
bli_obj_release_pack( b1_pack );

View File

@@ -127,14 +127,9 @@ void bli_trsm_blk_var2b( obj_t* a,
trsm_thread_sub_trsm( thread ) );
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
trsm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -128,14 +128,9 @@ void bli_trsm_blk_var2f( obj_t* a,
trsm_thread_sub_trsm( thread ) );
// Unpack C1 (if C1 was packed).
// Currently must be done by 1 thread
if( thread_am_ichief( thread ) ) {
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ) );
}
//Barrier to make sure unpacking is done before next iteration's packing of C
//Somehow, we'd like to make this a noop if packing isn't done.
thread_ibarrier( thread );
bli_unpackm_int( c1_pack, &c1,
cntl_sub_unpackm_c( cntl ),
trsm_thread_sub_ipackm( thread ) );
}
// If any packing buffers were acquired within packm, release them back

View File

@@ -137,14 +137,15 @@ void bli_trsm_blk_var3b( obj_t* a,
thread_obarrier( thread );
// Unpack C (if C was packed).
if( thread_am_ochief( thread ) ) {
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ) );
bli_obj_release_pack( c_pack );
}
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ),
trsm_thread_sub_opackm( thread ) );
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
if( thread_am_ochief( thread ) ) {
bli_obj_release_pack( c_pack );
}
if( thread_am_ichief( thread ) ) {
bli_obj_release_pack( a1_pack );
bli_obj_release_pack( b1_pack );

View File

@@ -137,14 +137,15 @@ void bli_trsm_blk_var3f( obj_t* a,
thread_obarrier( thread );
// Unpack C (if C was packed).
if( thread_am_ochief( thread ) ) {
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ) );
bli_obj_release_pack( c_pack );
}
bli_unpackm_int( c_pack, c,
cntl_sub_unpackm_c( cntl ),
trsm_thread_sub_opackm( thread ) );
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
if( thread_am_ochief( thread ) ) {
bli_obj_release_pack( c_pack );
}
if( thread_am_ichief( thread ) ) {
bli_obj_release_pack( a1_pack );
bli_obj_release_pack( b1_pack );

View File

@@ -35,7 +35,7 @@
#include "blis.h"
#ifdef BLIS_TREE_BARRIER
barrier_t* bli_free_barrier_tree( barrier_t* barrier )
void bli_free_barrier_tree( barrier_t* barrier )
{
if( barrier == NULL )
return;

View File

@@ -34,9 +34,6 @@
#ifndef BLIS_THREADING_H
#define BLIS_THREADING_H
#define BLIS_TREE_BARRIER
#define BLIS_TREE_BARRIER_ARITY 4
#ifdef BLIS_TREE_BARRIER
struct barrier_s
{