mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Added new kernel blocksize macro aliases.
Details: - Added new macros that alias level-3 cache and register blocksize macros to names that can be constructed via the PASTEMAC macro. These aliased macro definitions live inside bli_kernel_macro_defs.h, which is now #included after bli_kernel.h. - Modified macro-kernels to use new aliased blocksize macros instead of operation-specific ones. - Removed local, operation-specific kernel blocksize macro definitions (found in macro-kernel header files).
This commit is contained in:
@@ -144,21 +144,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias some constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const dim_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const dim_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_sgemm_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_sgemm_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_sgemm_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_sgemm_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dgemm_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dgemm_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dgemm_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dgemm_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_cgemm_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_cgemm_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_cgemm_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_cgemm_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_zgemm_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_zgemm_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_zgemm_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_zgemm_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
@@ -148,21 +148,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias some constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const bool_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const bool_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_sherk_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_sherk_l_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_sherk_l_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_sherk_l_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dherk_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dherk_l_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dherk_l_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dherk_l_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_cherk_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_cherk_l_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_cherk_l_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_cherk_l_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_zherk_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_zherk_l_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_zherk_l_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_zherk_l_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
@@ -148,21 +148,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias some constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const bool_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const bool_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_sherk_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_sherk_u_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_sherk_u_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_sherk_u_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dherk_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dherk_u_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dherk_u_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dherk_u_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_cherk_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_cherk_u_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_cherk_u_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_cherk_u_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_zherk_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_zherk_u_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_zherk_u_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_zherk_u_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
@@ -149,21 +149,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias some constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const dim_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const dim_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_strmm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_strmm_l_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_strmm_l_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_strmm_l_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dtrmm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dtrmm_l_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dtrmm_l_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dtrmm_l_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_ctrmm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_ctrmm_l_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_ctrmm_l_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_ctrmm_l_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_ztrmm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_ztrmm_l_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_ztrmm_l_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_ztrmm_l_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
@@ -149,21 +149,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias some constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const dim_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const dim_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_strmm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_strmm_u_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_strmm_u_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_strmm_u_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dtrmm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dtrmm_u_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dtrmm_u_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dtrmm_u_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_ctrmm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_ctrmm_u_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_ctrmm_u_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_ctrmm_u_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_ztrmm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_ztrmm_u_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_ztrmm_u_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_ztrmm_u_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
@@ -138,21 +138,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const dim_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const dim_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_strsm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_strsm_l_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_strsm_l_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_strsm_l_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dtrsm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dtrsm_l_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dtrsm_l_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dtrsm_l_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_ctrsm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_ctrsm_l_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_ctrsm_l_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_ctrsm_l_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_ztrsm_l_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_ztrsm_l_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_ztrsm_l_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_ztrsm_l_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
@@ -138,21 +138,21 @@ void PASTEMAC(ch,varname)( \
|
||||
) \
|
||||
{ \
|
||||
/* Temporary buffer for duplicating elements of B. */ \
|
||||
ctype bd[ PASTEMAC2(ch,varname,_kc) * \
|
||||
PASTEMAC2(ch,varname,_nr) * \
|
||||
PASTEMAC2(ch,varname,_ndup) ]; \
|
||||
ctype bd[ PASTEMAC(ch,kc) * \
|
||||
PASTEMAC(ch,nr) * \
|
||||
PASTEMAC(ch,ndup) ]; \
|
||||
ctype* restrict bp; \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ PASTEMAC2(ch,varname,_mr) * \
|
||||
PASTEMAC2(ch,varname,_nr) ]; \
|
||||
ctype ct[ PASTEMAC(ch,mr) * \
|
||||
PASTEMAC(ch,nr) ]; \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = PASTEMAC2(ch,varname,_mr); \
|
||||
const inc_t cs_ct = PASTEMAC(ch,mr); \
|
||||
\
|
||||
/* Alias constants to shorter names. */ \
|
||||
const dim_t MR = PASTEMAC2(ch,varname,_mr); \
|
||||
const dim_t NR = PASTEMAC2(ch,varname,_nr); \
|
||||
const dim_t NDUP = PASTEMAC2(ch,varname,_ndup); \
|
||||
const dim_t MR = PASTEMAC(ch,mr); \
|
||||
const dim_t NR = PASTEMAC(ch,nr); \
|
||||
const dim_t NDUP = PASTEMAC(ch,ndup); \
|
||||
const bool_t DUPB = NDUP != 1; \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -33,34 +33,6 @@
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// Define macro-kernel blocksizes.
|
||||
//
|
||||
// NOTE: These MR and NR values below MUST match the values that packm uses
|
||||
// when initializing its control tree node.
|
||||
//
|
||||
|
||||
#define bli_strsm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_strsm_u_ker_var2_kc BLIS_DEFAULT_KC_S
|
||||
#define bli_strsm_u_ker_var2_mr BLIS_DEFAULT_MR_S
|
||||
#define bli_strsm_u_ker_var2_nr BLIS_DEFAULT_NR_S
|
||||
|
||||
#define bli_dtrsm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_dtrsm_u_ker_var2_kc BLIS_DEFAULT_KC_D
|
||||
#define bli_dtrsm_u_ker_var2_mr BLIS_DEFAULT_MR_D
|
||||
#define bli_dtrsm_u_ker_var2_nr BLIS_DEFAULT_NR_D
|
||||
|
||||
#define bli_ctrsm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_ctrsm_u_ker_var2_kc BLIS_DEFAULT_KC_C
|
||||
#define bli_ctrsm_u_ker_var2_mr BLIS_DEFAULT_MR_C
|
||||
#define bli_ctrsm_u_ker_var2_nr BLIS_DEFAULT_NR_C
|
||||
|
||||
#define bli_ztrsm_u_ker_var2_ndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
#define bli_ztrsm_u_ker_var2_kc BLIS_DEFAULT_KC_Z
|
||||
#define bli_ztrsm_u_ker_var2_mr BLIS_DEFAULT_MR_Z
|
||||
#define bli_ztrsm_u_ker_var2_nr BLIS_DEFAULT_NR_Z
|
||||
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
86
frame/include/bli_kernel_macro_defs.h
Normal file
86
frame/include/bli_kernel_macro_defs.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2013, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_KERNEL_MACRO_DEFS_H
|
||||
#define BLIS_KERNEL_MACRO_DEFS_H
|
||||
|
||||
|
||||
// Redefine kernel blocksizes, defined in bli_kernel.h, to shorter
|
||||
// names that can be derived via PASTEMAC macro.
|
||||
|
||||
// Cache blocksizes
|
||||
|
||||
#define bli_smc BLIS_DEFAULT_MC_S
|
||||
#define bli_snc BLIS_DEFAULT_NC_S
|
||||
#define bli_skc BLIS_DEFAULT_KC_S
|
||||
|
||||
#define bli_dmc BLIS_DEFAULT_MC_D
|
||||
#define bli_dnc BLIS_DEFAULT_NC_D
|
||||
#define bli_dkc BLIS_DEFAULT_KC_D
|
||||
|
||||
#define bli_cmc BLIS_DEFAULT_MC_C
|
||||
#define bli_cnc BLIS_DEFAULT_NC_C
|
||||
#define bli_ckc BLIS_DEFAULT_KC_C
|
||||
|
||||
#define bli_zmc BLIS_DEFAULT_MC_Z
|
||||
#define bli_znc BLIS_DEFAULT_NC_Z
|
||||
#define bli_zkc BLIS_DEFAULT_KC_Z
|
||||
|
||||
// Register blocksizes
|
||||
|
||||
#define bli_smr BLIS_DEFAULT_MR_S
|
||||
#define bli_snr BLIS_DEFAULT_NR_S
|
||||
#define bli_skr BLIS_DEFAULT_KR_S
|
||||
|
||||
#define bli_dmr BLIS_DEFAULT_MR_D
|
||||
#define bli_dnr BLIS_DEFAULT_NR_D
|
||||
#define bli_dkr BLIS_DEFAULT_KR_D
|
||||
|
||||
#define bli_cmr BLIS_DEFAULT_MR_C
|
||||
#define bli_cnr BLIS_DEFAULT_NR_C
|
||||
#define bli_ckr BLIS_DEFAULT_KR_C
|
||||
|
||||
#define bli_zmr BLIS_DEFAULT_MR_Z
|
||||
#define bli_znr BLIS_DEFAULT_NR_Z
|
||||
#define bli_zkr BLIS_DEFAULT_KR_Z
|
||||
|
||||
// Duplication
|
||||
|
||||
#define bli_sndup BLIS_DEFAULT_NUM_DUPL_S
|
||||
#define bli_dndup BLIS_DEFAULT_NUM_DUPL_D
|
||||
#define bli_cndup BLIS_DEFAULT_NUM_DUPL_C
|
||||
#define bli_zndup BLIS_DEFAULT_NUM_DUPL_Z
|
||||
|
||||
|
||||
#endif
|
||||
@@ -66,6 +66,7 @@
|
||||
#include "bli_gentfunc_macro_defs.h"
|
||||
#include "bli_gentprot_macro_defs.h"
|
||||
|
||||
#include "bli_kernel_macro_defs.h"
|
||||
#include "bli_mem_macro_defs.h"
|
||||
#include "bli_pool_macro_defs.h"
|
||||
#include "bli_obj_macro_defs.h"
|
||||
|
||||
@@ -64,6 +64,11 @@ extern "C" {
|
||||
#include <time.h>
|
||||
|
||||
|
||||
// -- BLIS kernel definitions --
|
||||
|
||||
#include "bli_kernel.h"
|
||||
|
||||
|
||||
// -- BLIS definitions --
|
||||
|
||||
#include "bli_type_defs.h"
|
||||
@@ -71,11 +76,6 @@ extern "C" {
|
||||
#include "bli_extern_defs.h"
|
||||
|
||||
|
||||
// -- BLIS kernel definitions --
|
||||
|
||||
#include "bli_kernel.h"
|
||||
|
||||
|
||||
// -- Base operation prototypes --
|
||||
|
||||
#include "bli_init.h"
|
||||
|
||||
Reference in New Issue
Block a user