Changed usage of virtual microkernel slots in cntx.

Details:
- Changed the way virtual microkernels are handled in the context.
  Previously, there were query routines such as bli_cntx_get_l3_ukr_dt()
  which returned the native ukernel for a datatype if the method was
  equal to BLIS_NAT, or the virtual ukernel for that datatype if the
  method was some other value. Going forward, the context native and
  virtual ukernel slots will both be initialized to native ukernel
  function pointers for native execution, and for non-native execution
  the virtual ukernel pointer will be something else. This allows us
  to always query the virtual ukernel slot (from within, say, the
  macrokernel) without needing any logic in the query routine to decide
  which function pointer (native or virtual) to return. (Essentially,
  the logic has been shifted to init-time instead of compute-time.)
  This scheme will also allow generalized virtual ukernels as a way
  to insert extra logic in between the macrokernel and the native
  microkernel.
- Initialize native contexts (in bli_cntx_ref.c) with native ukernel
  function addresses stored to the virtual ukernel slots pursuant to
  the above policy change.
- Renamed all static functions that were native/virtual-ambiguous, such
  as bli_cntx_get_l3_ukr_dt() or bli_cntx_l3_ukr_prefers_cols_dt()
  pursuant to the above polilcy change. Those routines now use the
  substring "get_l3_vir_ukr" in their name instead of "get_l3_ukr". All
  of these functions were static functions defined in bli_cntx.h, and
  most uses were in level-3 front-ends and macrokernels.
- Deprecated anti_pref bool_t in context, along with related functions
  such as bli_cntx_l3_ukr_eff_dislikes_storage_of(), now that 1m's
  panel-block execution is disabled.
This commit is contained in:
Field G. Van Zee
2018-06-12 19:38:37 -05:00
parent dbaf440540
commit 87db5c048e
33 changed files with 86 additions and 142 deletions

View File

@@ -55,7 +55,7 @@ void PASTEMAC(ch,opname) \
\
/* Query the context for the function address of the current
datatype's micro-kernel. */ \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
\
/* Invoke the typed function for the given datatype. */ \
f( \
@@ -91,7 +91,7 @@ void PASTEMAC(ch,opname) \
\
/* Query the context for the function address of the current
datatype's micro-kernel. */ \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
\
/* Invoke the typed function for the given datatype. */ \
f( \
@@ -129,7 +129,7 @@ void PASTEMAC(ch,opname) \
\
/* Query the context for the function address of the current
datatype's micro-kernel. */ \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
\
/* Invoke the typed function for the given datatype. */ \
f( \

View File

@@ -77,7 +77,7 @@ void bli_gemm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_eff_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_swap( &a_local, &b_local );

View File

@@ -183,7 +183,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -192,7 +192,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. */ \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. */ \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -72,7 +72,7 @@ void bli_hemm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_toggle_conj( &a_local );

View File

@@ -92,7 +92,7 @@ void bli_her2k_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_swap( &a_local, &bh_local );
bli_obj_swap( &b_local, &ah_local );

View File

@@ -77,7 +77,7 @@ void bli_herk_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_toggle_conj( &a_local );
bli_obj_toggle_conj( &ah_local );

View File

@@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -72,7 +72,7 @@ void bli_symm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_induce_trans( &b_local );

View File

@@ -81,7 +81,7 @@ void bli_syr2k_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_induce_trans( &c_local );
}

View File

@@ -74,7 +74,7 @@ void bli_syrk_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_induce_trans( &c_local );
}

View File

@@ -105,7 +105,7 @@ void bli_trmm_front
// NOTE: We disable the optimization for 1x1 matrices since the concept
// of row- vs. column storage breaks down.
if ( !bli_obj_is_1x1( &c_local ) )
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_induce_trans( &a_local );

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -104,7 +104,7 @@ void bli_trmm3_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_induce_trans( &a_local );

View File

@@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \
\
/* Cast the micro-kernel address to its function pointer type. */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \
\
/* Cast the micro-kernel address to its function pointer type. */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \
is transposed so that all kernel instances are of the "left"
variety (since those are the only trsm ukernels that exist). */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \
is transposed so that all kernel instances are of the "left"
variety (since those are the only trsm ukernels that exist). */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\

View File

@@ -544,8 +544,10 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
// -- End variable argument section --
// Query the context for the addresses of:
// - the l3 virtual ukernel func_t array
// - the l3 native ukernel func_t array
// - the l3 native ukernel preferences array
func_t* cntx_l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx );
func_t* cntx_l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx );
mbool_t* cntx_l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx );
@@ -565,11 +567,18 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
// Index into the func_t and mbool_t for the current kernel id
// being processed.
func_t* vukrs = &cntx_l3_vir_ukrs[ ukr_id ];
func_t* ukrs = &cntx_l3_nat_ukrs[ ukr_id ];
mbool_t* prefs = &cntx_l3_nat_ukrs_prefs[ ukr_id ];
// Store the ukernel function pointer and preference values into
// the context.
// the context. Notice that we redundantly store the native
// ukernel address in both the native and virtual ukernel slots
// in the context. This is standard practice when creating a
// native context. (Induced method contexts will overwrite the
// virtual function pointer with the address of the appropriate
// virtual ukernel.)
bli_func_set_dt( ukr_fp, ukr_dt, vukrs );
bli_func_set_dt( ukr_fp, ukr_dt, ukrs );
bli_mbool_set_dt( ukr_pref, ukr_dt, prefs );
}

View File

@@ -60,8 +60,6 @@ typedef struct cntx_s
pack_t schema_b;
pack_t schema_c;
bool_t anti_pref;
dim_t* thrloop;
membrk_t* membrk;
@@ -126,10 +124,6 @@ static pack_t bli_cntx_schema_c_panel( cntx_t* cntx )
{
return cntx->schema_c_panel;
}
static bool_t bli_cntx_anti_pref( cntx_t* cntx )
{
return cntx->anti_pref;
}
static dim_t* bli_cntx_thrloop( cntx_t* cntx )
{
return cntx->thrloop;
@@ -166,10 +160,6 @@ static void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cnt
bli_cntx_set_schema_a_block( sa, cntx );
bli_cntx_set_schema_b_panel( sb, cntx );
}
static void bli_cntx_set_anti_pref( bool_t anti_pref, cntx_t* cntx )
{
cntx->anti_pref = anti_pref;
}
static void bli_cntx_set_membrk( membrk_t* membrk, cntx_t* cntx )
{
cntx->membrk = membrk;
@@ -234,27 +224,6 @@ static dim_t bli_cntx_get_bmult_dt( num_t dt, bszid_t bs_id, cntx_t* cntx )
// -----------------------------------------------------------------------------
static func_t* bli_cntx_get_l3_ukrs( l3ukr_t ukr_id, cntx_t* cntx )
{
func_t* funcs;
if ( bli_cntx_method( (cntx) ) != BLIS_NAT )
funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
else
funcs = bli_cntx_l3_nat_ukrs_buf( cntx );
func_t* func = &funcs[ ukr_id ];
return func;
}
static void* bli_cntx_get_l3_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
{
func_t* func = bli_cntx_get_l3_ukrs( ukr_id, cntx );
return bli_func_get_dt( dt, func );
}
static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx )
{
func_t* funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
@@ -487,55 +456,43 @@ static bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_i
return !bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx );
}
static bool_t bli_cntx_l3_nat_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
}
static bool_t bli_cntx_l3_nat_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_nat_ukr_dislikes_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
}
// -----------------------------------------------------------------------------
static bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
{
// For induced methods, return the ukernel storage preferences of the
// corresponding real micro-kernel.
// NOTE: This projection to real domain becomes unnecessary if you
// set the exec_dt for 1m to the real projection of the storage
// datatype.
if ( bli_cntx_method( cntx ) != BLIS_NAT )
dt = bli_dt_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
{
// For induced methods, return the ukernel storage preferences of the
// corresponding real micro-kernel.
// NOTE: This projection to real domain becomes unnecessary if you
// set the exec_dt for 1m to the real projection of the storage
// datatype.
if ( bli_cntx_method( cntx ) != BLIS_NAT )
dt = bli_dt_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
const num_t dt = bli_obj_dt( obj );
// Note that we use the execution datatype, which may differ from the
// storage datatype of C (though this would happen in very few situations).
const num_t dt = bli_obj_exec_dt( obj );
const bool_t ukr_prefers_rows
= bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
= bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, ukr_id, cntx );
const bool_t ukr_prefers_cols
= bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx );
= bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, ukr_id, cntx );
bool_t r_val = FALSE;
if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE;
@@ -544,29 +501,9 @@ static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cn
return r_val;
}
static bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
return !bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
}
static bool_t bli_cntx_l3_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_ukr_dislikes_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
return !bli_cntx_l3_vir_ukr_prefers_storage_of( obj, ukr_id, cntx );
}
// -----------------------------------------------------------------------------

View File

@@ -584,7 +584,7 @@ char* bli_gks_l3_ukr_impl_string( l3ukr_t ukr, ind_t method, num_t dt )
// then query the ukernel function pointer for the given datatype from
// that context.
cntx_t* cntx = bli_gks_query_ind_cntx( method, dt );
void* fp = bli_cntx_get_l3_ukr_dt( dt, ukr, cntx );
void* fp = bli_cntx_get_l3_vir_ukr_dt( dt, ukr, cntx );
// Check whether the ukernel function pointer is NULL for the given
// datatype. If it is NULL, return the string for not applicable.

View File

@@ -1128,8 +1128,6 @@ typedef struct cntx_s
pack_t schema_b_panel;
pack_t schema_c_panel;
bool_t anti_pref;
dim_t thrloop[ BLIS_NUM_LOOPS ];
membrk_t* membrk;

View File

@@ -60,9 +60,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype* minus_one = PASTEMAC(ch,m1); \
\
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
PASTECH(ch,trsm_ukr_ft) \
trsm_ukr = bli_cntx_get_l3_ukr_dt( dt, trsmkerid, cntx ); \
trsm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, trsmkerid, cntx ); \
\
/* lower: b11 = alpha * b11 - a10 * b01; */ \
/* upper: b11 = alpha * b11 - a12 * b21; */ \

View File

@@ -363,11 +363,11 @@ void GENBARNAME(cntx_init)
funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm1m_ukr_name );
gen_func_init_co( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm1m_l_ukr_name );
gen_func_init_co( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm1m_u_ukr_name );
gen_func_init_co( &funcs[ BLIS_TRSM_L_UKR ], trsm1m_l_ukr_name );
gen_func_init_co( &funcs[ BLIS_TRSM_U_UKR ], trsm1m_u_ukr_name );
gen_func_init( &funcs[ BLIS_GEMM_UKR ], gemm_ukr_name );
gen_func_init( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm_l_ukr_name );
gen_func_init( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm_u_ukr_name );
gen_func_init( &funcs[ BLIS_TRSM_L_UKR ], trsm_l_ukr_name );
gen_func_init( &funcs[ BLIS_TRSM_U_UKR ], trsm_u_ukr_name );
// -- Set level-3 native micro-kernels and preferences ---------------------
@@ -467,7 +467,7 @@ void GENBARNAME(cntx_init)
bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS, cntx );
bli_cntx_set_schema_c_panel( BLIS_NOT_PACKED, cntx );
bli_cntx_set_anti_pref( FALSE, cntx );
//bli_cntx_set_anti_pref( FALSE, cntx );
bli_cntx_set_thrloop( 1, 1, 1, 1, 1, cntx );
@@ -726,7 +726,7 @@ void GENBAINAME(cntx_init)
// Initialize the blocksizes according to the micro-kernel preference as
// well as the algorithm.
if ( bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) )
{
// This branch is used for algorithms 1m_c_bp, 1m_r_pb.
@@ -754,7 +754,7 @@ void GENBAINAME(cntx_init)
cntx
);
}
else // if ( bli_cntx_l3_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) )
else // if ( bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) )
{
// This branch is used for algorithms 1m_r_bp, 1m_c_pb.
@@ -811,7 +811,7 @@ void GENBAINAME(cntx_init)
}
else if ( method == BLIS_1M )
{
const bool_t is_pb = FALSE;
//const bool_t is_pb = FALSE;
// Set the anti-preference field to TRUE when executing a panel-block
// algorithm, and FALSE otherwise. This will cause higher-level generic
@@ -819,7 +819,7 @@ void GENBAINAME(cntx_init)
// the micro-kernel output preference so that the two will come back into
// agreement in the panel-block macro-kernel (which implemented in terms
// of the block-panel macro-kernel with some induced transpositions).
bli_cntx_set_anti_pref( is_pb, cntx );
//bli_cntx_set_anti_pref( is_pb, cntx );
}
else // if ( method == BLIS_NAT )
{

View File

@@ -54,7 +54,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
PASTECH(chr,gemm_ukr_ft) \
rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t row_pref = !col_pref; \
\
const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \

View File

@@ -59,7 +59,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTECH(ch,trsm_ukr_ft) \
ctrsm_vir_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, trsmkerid, cntx ); \
\
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \
const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \

View File

@@ -87,7 +87,7 @@ void blx_gemm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_eff_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_swap( &a_local, &b_local );

View File

@@ -171,7 +171,7 @@ void PASTECH2(blx_,ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -180,7 +180,7 @@ void PASTECH2(blx_,ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\