Updated setting/querying pack schema (cntx->cntl).

- Query pack schemas in level-3 bli_*_front() functions and store those
  values in the schema bitfields of the correponding obj_t's when the
  cntx's method is not BLIS_NAT. (When method is BLIS_NAT, the default
  native schemas are stored to the obj_t's.)
- In bli_l3_cntl_create_if(), query the schemas stored to the obj_t's in
  bli_*_front(), clear the schema bitfields, and pass the queried values
  into bli_gemm_cntl_create() and bli_trsm_cntl_create().
- Updated APIs for bli_gemm_cntl_create() and bli_trsm_cntl_create() to
  take schemas for A and B, and use these values to initialize the
  appropriate control tree nodes. (Also cpp-disabled the panel-block cntl
  tree creation variant, bli_gemmpb_cntl_create(), as it has not been
  employed by BLIS in quite some time.)
- Simplified querying of schema in bli_packm_init() thanks to above
  changes.
- Updated openmp and pthreads definitions of bli_l3_thread_decorator()
  so that thread-local aliases of matrix operands are guaranteed, even
  if aliasing is disabled within the internal back-end functions (e.g.
  bli_gemm_int.c). Also added a comment to bli_thrcomm_single.c
  explaining why the extra aliasing is not needed there.
- Change bli_gemm() and level-3 friends so that the operation's ind()
  function is called only if all matrix operands have the same datatype,
  and only if that datatype is complex. The former condition is needed
  in preparation for work related to mixed domain operands, while the
  latter helps with readability, especially for those who don't want to
  venture into frame/ind.
- Reshuffled arguments in bli_cntx_set_thrloop_from_env() to be
  consistent with BLIS calling conventions (modified argument(s) are
  last), and updated all invocations in the level-3 _front() functions.
- Comment updates to bli_cntx_set_thrloop_from_env().
This commit is contained in:
Field G. Van Zee
2018-06-02 20:28:20 -05:00
parent 965db85d29
commit f97a86f322
22 changed files with 503 additions and 137 deletions

View File

@@ -56,8 +56,8 @@ siz_t bli_packm_init
bool_t does_invert_diag; bool_t does_invert_diag;
bool_t rev_iter_if_upper; bool_t rev_iter_if_upper;
bool_t rev_iter_if_lower; bool_t rev_iter_if_lower;
//pack_t pack_schema; pack_t schema;
packbuf_t pack_buf_type; //packbuf_t pack_buf_type;
siz_t size_needed; siz_t size_needed;
// Check parameters. // Check parameters.
@@ -70,8 +70,8 @@ siz_t bli_packm_init
does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl ); does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl );
rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl ); rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl );
rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl ); rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl );
//pack_schema = bli_cntl_packm_params_pack_schema( cntl ); schema = bli_cntl_packm_params_pack_schema( cntl );
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl ); //pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
#if 0 #if 0
// Let us now check to see if the object has already been packed. First // Let us now check to see if the object has already been packed. First
@@ -112,30 +112,51 @@ siz_t bli_packm_init
return 0; return 0;
} }
// We now ignore the pack_schema field in the control tree and #if 0
// extract the schema from the context, depending on whether we are
// preparing to pack a block of A or panel of B. For A and B, we must
// obtain the schema from the context since the induced methods reuse
// the same control trees used by native execution, and those induced
// methods specify the schema used by the current execution phase
// within the context (whereas the control tree does not change).
pack_t schema; pack_t schema;
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK ) if ( bli_cntx_method( cntx ) != BLIS_NAT )
{ {
schema = bli_cntx_schema_a_block( cntx ); // We now ignore the pack_schema field in the control tree and
// extract the schema from the context, depending on whether we are
// preparing to pack a block of A or panel of B. For A and B, we must
// obtain the schema from the context since the induced methods reuse
// the same control trees used by native execution, and those induced
// methods specify the schema used by the current execution phase
// within the context (whereas the control tree does not change).
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
{
schema = bli_cntx_schema_a_block( cntx );
}
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
{
schema = bli_cntx_schema_b_panel( cntx );
}
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
{
schema = bli_cntl_packm_params_pack_schema( cntl );
}
} }
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL ) else // ( bli_cntx_method( cntx ) == BLIS_NAT )
{ {
schema = bli_cntx_schema_b_panel( cntx ); // For native execution, we obtain the schema from the control tree
// node. (Notice that it doesn't matter if the pack_buf_type is for
// A or B.)
schema = bli_cntl_packm_params_pack_schema( cntl );
} }
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) // This is no longer needed now that we branch between native and
// non-native cases above.
#if 0
if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
{ {
// If we get a request to pack C for some reason, it is likely // If we get a request to pack C for some reason, it is likely
// not part of an induced method, and so it would be safe (and // not part of an induced method, and so it would be safe (and
// necessary) to read the pack schema from the control tree. // necessary) to read the pack schema from the control tree.
schema = bli_cntl_packm_params_pack_schema( cntl ); schema = bli_cntl_packm_params_pack_schema( cntl );
} }
#endif
#endif
// Prepare a few other variables based on properties of the control // Prepare a few other variables based on properties of the control
// tree. // tree.

View File

@@ -45,6 +45,21 @@ void bli_l3_cntl_create_if
cntl_t** cntl_use cntl_t** cntl_use
) )
{ {
// This is part of a hack to support mixed domain in bli_gemm_front().
// Sometimes we need to specify a non-standard schema for A and B, and
// we decided to transmit them via the schema field in the obj_t's
// rather than pass them in as function parameters. Once the values
// have been read, we immediately reset them back to their expected
// values for unpacked objects. Notice that we do this even if the
// caller passed in a custom control tree; that's because we still need
// to reset the pack schema of a and b, which were modified by the
// operation's _front() function.
pack_t schema_a = bli_obj_pack_schema( a );
pack_t schema_b = bli_obj_pack_schema( b );
bli_obj_set_pack_schema( BLIS_NOT_PACKED, a );
bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
// If the control tree pointer is NULL, we construct a default // If the control tree pointer is NULL, we construct a default
// tree as a function of the operation family. // tree as a function of the operation family.
if ( cntl_orig == NULL ) if ( cntl_orig == NULL )
@@ -53,7 +68,7 @@ void bli_l3_cntl_create_if
family == BLIS_HERK || family == BLIS_HERK ||
family == BLIS_TRMM ) family == BLIS_TRMM )
{ {
*cntl_use = bli_gemm_cntl_create( family ); *cntl_use = bli_gemm_cntl_create( family, schema_a, schema_b );
} }
else // if ( family == BLIS_TRSM ) else // if ( family == BLIS_TRSM )
{ {
@@ -62,7 +77,7 @@ void bli_l3_cntl_create_if
if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT; if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT;
else side = BLIS_RIGHT; else side = BLIS_RIGHT;
*cntl_use = bli_trsm_cntl_create( side ); *cntl_use = bli_trsm_cntl_create( side, schema_a, schema_b );
} }
} }
else else

View File

@@ -57,20 +57,25 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
/* Invoke the operation's "ind" function--its induced method front-end. /* Only proceed with an induced method if all operands have the same
This function will call native execution for real domain problems. (complex) datatype. If any datatypes differ, skip the induced method
For complex problems, it calls the highest priority induced method chooser function and proceed directly with native execution, which is
that is available (ie: implemented and enabled), and if none are where mixed datatype support will be implemented (if at all). */ \
enabled, it calls native execution. */ \ if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
PASTEMAC(opname,ind) \ bli_obj_dt( b ) == bli_obj_dt( c ) && \
( \ bli_obj_is_complex( c ) ) \
alpha, \ { \
a, \ /* Invoke the operation's "ind" function--its induced method front-end.
b, \ For complex problems, it calls the highest priority induced method
beta, \ that is available (ie: implemented and enabled), and if none are
c, \ enabled, it calls native execution. (For real problems, it calls
cntx \ the operation's native execution interface.) */ \
); \ PASTEMAC(opname,ind)( alpha, a, b, beta, c, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx ); \
} \
} }
GENFRONT( gemm ) GENFRONT( gemm )
@@ -96,16 +101,25 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
PASTEMAC(opname,ind) \ /* Only proceed with an induced method if all operands have the same
( \ (complex) datatype. If any datatypes differ, skip the induced method
side, \ chooser function and proceed directly with native execution, which is
alpha, \ where mixed datatype support will be implemented (if at all). */ \
a, \ if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
b, \ bli_obj_dt( b ) == bli_obj_dt( c ) && \
beta, \ bli_obj_is_complex( c ) ) \
c, \ { \
cntx \ /* Invoke the operation's "ind" function--its induced method front-end.
); \ For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( side, alpha, a, b, beta, c, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( side, alpha, a, b, beta, c, cntx ); \
} \
} }
GENFRONT( hemm ) GENFRONT( hemm )
@@ -129,14 +143,24 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
PASTEMAC(opname,ind) \ /* Only proceed with an induced method if all operands have the same
( \ (complex) datatype. If any datatypes differ, skip the induced method
alpha, \ chooser function and proceed directly with native execution, which is
a, \ where mixed datatype support will be implemented (if at all). */ \
beta, \ if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
c, \ bli_obj_is_complex( c ) ) \
cntx \ { \
); \ /* Invoke the operation's "ind" function--its induced method front-end.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( alpha, a, beta, c, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( alpha, a, beta, c, cntx ); \
} \
} }
GENFRONT( herk ) GENFRONT( herk )
@@ -159,14 +183,24 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
PASTEMAC(opname,ind) \ /* Only proceed with an induced method if all operands have the same
( \ (complex) datatype. If any datatypes differ, skip the induced method
side, \ chooser function and proceed directly with native execution, which is
alpha, \ where mixed datatype support will be implemented (if at all). */ \
a, \ if ( bli_obj_dt( a ) == bli_obj_dt( b ) && \
b, \ bli_obj_is_complex( b ) ) \
cntx \ { \
); \ /* Invoke the operation's "ind" function--its induced method front-end.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( side, alpha, a, b, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( side, alpha, a, b, cntx ); \
} \
} }
GENFRONT( trmm ) GENFRONT( trmm )

View File

@@ -36,17 +36,21 @@
cntl_t* bli_gemm_cntl_create cntl_t* bli_gemm_cntl_create
( (
opid_t family opid_t family,
pack_t schema_a,
pack_t schema_b
) )
{ {
return bli_gemmbp_cntl_create( family ); return bli_gemmbp_cntl_create( family, schema_a, schema_b );
} }
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
cntl_t* bli_gemmbp_cntl_create cntl_t* bli_gemmbp_cntl_create
( (
opid_t family opid_t family,
pack_t schema_a,
pack_t schema_b
) )
{ {
void* macro_kernel_p = bli_gemm_ker_var2; void* macro_kernel_p = bli_gemm_ker_var2;
@@ -82,7 +86,7 @@ cntl_t* bli_gemmbp_cntl_create
FALSE, // do NOT invert diagonal FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper? FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower? FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS, schema_a, // normally BLIS_PACKED_ROW_PANELS
BLIS_BUFFER_FOR_A_BLOCK, BLIS_BUFFER_FOR_A_BLOCK,
gemm_cntl_bp_bu gemm_cntl_bp_bu
); );
@@ -106,7 +110,7 @@ cntl_t* bli_gemmbp_cntl_create
FALSE, // do NOT invert diagonal FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper? FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower? FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS, schema_b, // normally BLIS_PACKED_COL_PANELS
BLIS_BUFFER_FOR_B_PANEL, BLIS_BUFFER_FOR_B_PANEL,
gemm_cntl_op_bp gemm_cntl_op_bp
); );
@@ -134,6 +138,10 @@ cntl_t* bli_gemmbp_cntl_create
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// This control tree creation function is disabled because it is no longer used.
// (It was originally created in the run up to publishing the 1m journal article,
// but was disabled to reduce complexity.)
#if 0
cntl_t* bli_gemmpb_cntl_create cntl_t* bli_gemmpb_cntl_create
( (
opid_t family opid_t family
@@ -223,6 +231,7 @@ cntl_t* bli_gemmpb_cntl_create
return gemm_cntl_vl_mm; return gemm_cntl_vl_mm;
} }
#endif
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------

View File

@@ -34,20 +34,26 @@
cntl_t* bli_gemm_cntl_create cntl_t* bli_gemm_cntl_create
( (
opid_t family opid_t family,
pack_t schema_a,
pack_t schema_b
); );
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
cntl_t* bli_gemmbp_cntl_create cntl_t* bli_gemmbp_cntl_create
( (
opid_t family opid_t family,
pack_t schema_a,
pack_t schema_b
); );
#if 0
cntl_t* bli_gemmpb_cntl_create cntl_t* bli_gemmpb_cntl_create
( (
opid_t family opid_t family,
); );
#endif
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------

View File

@@ -87,10 +87,34 @@ void bli_gemm_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_GEMM, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_GEMM,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for gemm/hemm/symm
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end via the thread handler. // Invoke the internal back-end via the thread handler.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -88,10 +88,34 @@ void bli_hemm_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_HEMM, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_HEMM,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for gemm/hemm/symm
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -106,10 +106,38 @@ void bli_her2k_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_HER2K, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_HER2K,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bh_local );
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &bh_local );
bli_obj_set_pack_schema( schema_a, &b_local );
bli_obj_set_pack_schema( schema_b, &ah_local );
}
// Invoke herk twice, using beta only the first time. // Invoke herk twice, using beta only the first time.

View File

@@ -86,10 +86,34 @@ void bli_herk_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_HERK, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_HERK,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &ah_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -87,10 +87,34 @@ void bli_symm_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_SYMM, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_SYMM,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for gemm/hemm/symm
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -87,10 +87,38 @@ void bli_syr2k_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_SYR2K, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_SYR2K,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bt_local );
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &bt_local );
bli_obj_set_pack_schema( schema_a, &b_local );
bli_obj_set_pack_schema( schema_b, &at_local );
}
// Invoke herk twice, using beta only the first time. // Invoke herk twice, using beta only the first time.

View File

@@ -80,10 +80,34 @@ void bli_syrk_front
} }
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_SYRK, BLIS_LEFT, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_SYRK,
bli_obj_width( &a_local ) ); BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &at_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -130,10 +130,34 @@ void bli_trmm_front
bli_obj_set_as_root( &c_local ); bli_obj_set_as_root( &c_local );
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_TRMM, side, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_TRMM,
bli_obj_width( &a_local ) ); side,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -129,10 +129,34 @@ void bli_trmm3_front
bli_obj_set_as_root( &c_local ); bli_obj_set_as_root( &c_local );
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_TRMM3, side, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_TRMM3,
bli_obj_width( &a_local ) ); side,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -36,16 +36,21 @@
cntl_t* bli_trsm_cntl_create cntl_t* bli_trsm_cntl_create
( (
side_t side side_t side,
pack_t schema_a,
pack_t schema_b
) )
{ {
if ( bli_is_left( side ) ) return bli_trsm_l_cntl_create(); if ( bli_is_left( side ) )
else return bli_trsm_r_cntl_create(); return bli_trsm_l_cntl_create( schema_a, schema_b );
else
return bli_trsm_r_cntl_create( schema_a, schema_b );
} }
cntl_t* bli_trsm_l_cntl_create cntl_t* bli_trsm_l_cntl_create
( (
void pack_t schema_a,
pack_t schema_b
) )
{ {
void* macro_kernel_p = bli_trsm_xx_ker_var2; void* macro_kernel_p = bli_trsm_xx_ker_var2;
@@ -79,7 +84,7 @@ cntl_t* bli_trsm_l_cntl_create
TRUE, // do NOT invert diagonal TRUE, // do NOT invert diagonal
TRUE, // reverse iteration if upper? TRUE, // reverse iteration if upper?
FALSE, // reverse iteration if lower? FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS, schema_a, // normally BLIS_PACKED_ROW_PANELS
BLIS_BUFFER_FOR_A_BLOCK, BLIS_BUFFER_FOR_A_BLOCK,
trsm_cntl_bp_bu trsm_cntl_bp_bu
); );
@@ -103,7 +108,7 @@ cntl_t* bli_trsm_l_cntl_create
FALSE, // do NOT invert diagonal FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper? FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower? FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS, schema_b, // normally BLIS_PACKED_COL_PANELS
BLIS_BUFFER_FOR_B_PANEL, BLIS_BUFFER_FOR_B_PANEL,
trsm_cntl_op_bp trsm_cntl_op_bp
); );
@@ -131,7 +136,8 @@ cntl_t* bli_trsm_l_cntl_create
cntl_t* bli_trsm_r_cntl_create cntl_t* bli_trsm_r_cntl_create
( (
void pack_t schema_a,
pack_t schema_b
) )
{ {
void* macro_kernel_p = bli_trsm_xx_ker_var2; void* macro_kernel_p = bli_trsm_xx_ker_var2;
@@ -165,7 +171,7 @@ cntl_t* bli_trsm_r_cntl_create
FALSE, // do NOT invert diagonal FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper? FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower? FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS, schema_a, // normally BLIS_PACKED_ROW_PANELS
BLIS_BUFFER_FOR_A_BLOCK, BLIS_BUFFER_FOR_A_BLOCK,
trsm_cntl_bp_bu trsm_cntl_bp_bu
); );
@@ -189,7 +195,7 @@ cntl_t* bli_trsm_r_cntl_create
TRUE, // do NOT invert diagonal TRUE, // do NOT invert diagonal
FALSE, // reverse iteration if upper? FALSE, // reverse iteration if upper?
TRUE, // reverse iteration if lower? TRUE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS, schema_b, // normally BLIS_PACKED_COL_PANELS
BLIS_BUFFER_FOR_B_PANEL, BLIS_BUFFER_FOR_B_PANEL,
trsm_cntl_op_bp trsm_cntl_op_bp
); );

View File

@@ -34,17 +34,21 @@
cntl_t* bli_trsm_cntl_create cntl_t* bli_trsm_cntl_create
( (
side_t side side_t side,
pack_t schema_a,
pack_t schema_b
); );
cntl_t* bli_trsm_l_cntl_create cntl_t* bli_trsm_l_cntl_create
( (
void pack_t schema_a,
pack_t schema_b
); );
cntl_t* bli_trsm_r_cntl_create cntl_t* bli_trsm_r_cntl_create
( (
void pack_t schema_a,
pack_t schema_b
); );
void bli_trsm_cntl_free void bli_trsm_cntl_free

View File

@@ -121,10 +121,34 @@ void bli_trsm_front
bli_obj_set_as_root( &c_local ); bli_obj_set_as_root( &c_local );
// Record the threading for each level within the context. // Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_TRSM, side, cntx, bli_cntx_set_thrloop_from_env
bli_obj_length( &c_local ), (
bli_obj_width( &c_local ), BLIS_TRSM,
bli_obj_width( &a_local ) ); side,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end. // Invoke the internal back-end.
bli_l3_thread_decorator bli_l3_thread_decorator

View File

@@ -869,10 +869,10 @@ void bli_cntx_set_thrloop_from_env
( (
opid_t l3_op, opid_t l3_op,
side_t side, side_t side,
cntx_t* cntx,
dim_t m, dim_t m,
dim_t n, dim_t n,
dim_t k dim_t k,
cntx_t* cntx
) )
{ {
dim_t jc, pc, ic, jr, ir; dim_t jc, pc, ic, jr, ir;
@@ -988,7 +988,7 @@ void bli_cntx_set_thrloop_from_env
); );
} }
} }
else // if ( l3_op == BLIS_GEMM || l3_op == BLIS_HERK ) else // any other level-3 operation besides trmm/trsm
{ {
bli_cntx_set_thrloop bli_cntx_set_thrloop
( (

View File

@@ -674,12 +674,15 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... );
void bli_cntx_set_l1v_kers( dim_t n_kers, ... ); void bli_cntx_set_l1v_kers( dim_t n_kers, ... );
void bli_cntx_set_packm_kers( dim_t n_kers, ... ); void bli_cntx_set_packm_kers( dim_t n_kers, ... );
void bli_cntx_set_thrloop_from_env( opid_t l3_op, void bli_cntx_set_thrloop_from_env
side_t side, (
cntx_t* cntx, opid_t l3_op,
dim_t m, side_t side,
dim_t n, dim_t m,
dim_t k ); dim_t n,
dim_t k,
cntx_t* cntx
);
void bli_cntx_print( cntx_t* cntx ); void bli_cntx_print( cntx_t* cntx );

View File

@@ -231,11 +231,18 @@ void bli_l3_thread_decorator
{ {
dim_t id = omp_get_thread_num(); dim_t id = omp_get_thread_num();
obj_t a_t, b_t, c_t;
cntl_t* cntl_use; cntl_t* cntl_use;
thrinfo_t* thread; thrinfo_t* thread;
// Alias thread-local copies of A, B, and C. These will be the objects
// we pass into the thread functions.
bli_obj_alias_to( a, &a_t );
bli_obj_alias_to( b, &b_t );
bli_obj_alias_to( c, &c_t );
// Create a default control tree for the operation, if needed. // Create a default control tree for the operation, if needed.
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use );
// Create the root node of the current thread's thrinfo_t structure. // Create the root node of the current thread's thrinfo_t structure.
bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread ); bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );
@@ -243,17 +250,17 @@ void bli_l3_thread_decorator
func func
( (
alpha, alpha,
a, &a_t,
b, &b_t,
beta, beta,
c, &c_t,
cntx, cntx,
cntl_use, cntl_use,
thread thread
); );
// Free the control tree, if one was created locally. // Free the control tree, if one was created locally.
bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread ); bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread );
#ifdef PRINT_THRINFO #ifdef PRINT_THRINFO
threads[id] = thread; threads[id] = thread;

View File

@@ -161,11 +161,18 @@ void* bli_l3_thread_entry( void* data_void )
dim_t id = data->id; dim_t id = data->id;
thrcomm_t* gl_comm = data->gl_comm; thrcomm_t* gl_comm = data->gl_comm;
obj_t a_t, b_t, c_t;
cntl_t* cntl_use; cntl_t* cntl_use;
thrinfo_t* thread; thrinfo_t* thread;
// Alias thread-local copies of A, B, and C. These will be the objects
// we pass into the thread function.
bli_obj_alias_to( a, &a_t );
bli_obj_alias_to( b, &b_t );
bli_obj_alias_to( c, &c_t );
// Create a default control tree for the operation, if needed. // Create a default control tree for the operation, if needed.
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use );
// Create the root node of the current thread's thrinfo_t structure. // Create the root node of the current thread's thrinfo_t structure.
bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread ); bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );
@@ -173,17 +180,17 @@ void* bli_l3_thread_entry( void* data_void )
func func
( (
alpha, alpha,
a, &a_t,
b, &b_t,
beta, beta,
c, &c_t,
cntx, cntx,
cntl_use, cntl_use,
thread thread
); );
// Free the control tree, if one was created locally. // Free the control tree, if one was created locally.
bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread ); bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread );
// Free the current thread's thrinfo_t structure. // Free the current thread's thrinfo_t structure.
bli_l3_thrinfo_free( thread ); bli_l3_thrinfo_free( thread );

View File

@@ -94,6 +94,12 @@ void bli_l3_thread_decorator
cntl_t* cntl_use; cntl_t* cntl_use;
thrinfo_t* thread; thrinfo_t* thread;
// NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't
// need to alias objects for A, B, and C since they were already aliased
// in bli_*_front(). (We only needed thread-local copies so each could
// safely reset their internal (beta) scalars on c after the first
// iteration of the pc (kc) loop.)
// Create a default control tree for the operation, if needed. // Create a default control tree for the operation, if needed.
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );