diff --git a/frame/1m/bli_l1m_ft.h b/frame/1m/bli_l1m_ft.h index 381f18513..4361c9fac 100644 --- a/frame/1m/bli_l1m_ft.h +++ b/frame/1m/bli_l1m_ft.h @@ -68,6 +68,66 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ INSERT_GENTDEF( packm ) +// NOTE: the following macros generate packm kernel function type definitions +// that are "ctyped" and void-typed, for each of the floating-point datatypes. +// However, we will only make use of the void-typed definitions because the +// functions such as bli_?packm_cxk() (currently) use arrays of function +// pointers to store and access the function pointers for various unrolling +// (register blocksize) values, and therefore they must all be of the same +// type (hence the use of void* for kappa, a, and p). + +// packm_ker + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conja, \ + dim_t n, \ + ctype* restrict kappa, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict p, inc_t ldp \ + ); + +INSERT_GENTDEF( packm_cxk_ker ) + + +// packm_3mis_ker + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conja, \ + dim_t n, \ + ctype* restrict kappa, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict p, inc_t is_p, inc_t ldp \ + ); + +INSERT_GENTDEF( packm_cxk_3mis_ker ) +INSERT_GENTDEF( packm_cxk_4mi_ker ) + + +// packm_rih_ker + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + ctype* restrict kappa, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict p, inc_t ldp \ + ); + +INSERT_GENTDEF( packm_cxk_rih_ker ) + #endif diff --git a/frame/1m/bli_l1m_ker.h b/frame/1m/bli_l1m_ker.h new file mode 100644 index 000000000..794609f44 --- /dev/null +++ b/frame/1m/bli_l1m_ker.h @@ -0,0 +1,141 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + + +// +// Define template prototypes for level-1m kernels. +// + +// native packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_3xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_ker_name ) + + +// 3mis packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_3mis_ker_name ) + + +// 4mi packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_4mi_ker_name ) + + +// rih packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_3xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_rih_ker_name ) + diff --git a/frame/1m/packm/bli_packm_cxk.c b/frame/1m/packm/bli_packm_cxk.c index c50b06456..3c2ab6fd0 100644 --- a/frame/1m/packm/bli_packm_cxk.c +++ b/frame/1m/packm/bli_packm_cxk.c @@ -34,19 +34,10 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_ker_vft #undef FUNCPTR_ARRAY_LENGTH -#define FUNCPTR_ARRAY_LENGTH 18 +#define FUNCPTR_ARRAY_LENGTH 32 static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = { @@ -149,6 +140,65 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = { NULL, NULL, NULL, NULL, }, + /* micro-panel width = 18 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 19 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 20 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 21 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 22 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 23 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 24 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 25 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 26 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 27 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 28 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 29 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 30 */ + { + BLIS_SPACKM_30XK_KERNEL, + BLIS_CPACKM_30XK_KERNEL, + BLIS_DPACKM_30XK_KERNEL, + BLIS_ZPACKM_30XK_KERNEL, + }, + /* micro-panel width = 31 */ + { + NULL, NULL, NULL, NULL, + }, }; diff --git a/frame/1m/packm/bli_packm_cxk_3mis.c b/frame/1m/packm/bli_packm_cxk_3mis.c index 80c388096..da1ee28ca 100644 --- a/frame/1m/packm/bli_packm_cxk_3mis.c +++ b/frame/1m/packm/bli_packm_cxk_3mis.c @@ -34,16 +34,7 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t is_p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_3mis_ker_vft #undef FUNCPTR_ARRAY_LENGTH #define FUNCPTR_ARRAY_LENGTH 32 diff --git a/frame/1m/packm/bli_packm_cxk_4mi.c b/frame/1m/packm/bli_packm_cxk_4mi.c index c0291d245..2a906fac6 100644 --- a/frame/1m/packm/bli_packm_cxk_4mi.c +++ b/frame/1m/packm/bli_packm_cxk_4mi.c @@ -34,16 +34,7 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t is_p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_4mi_ker_vft #undef FUNCPTR_ARRAY_LENGTH #define FUNCPTR_ARRAY_LENGTH 32 diff --git a/frame/1m/packm/bli_packm_cxk_rih.c b/frame/1m/packm/bli_packm_cxk_rih.c index ec70c08c1..f019eed0b 100644 --- a/frame/1m/packm/bli_packm_cxk_rih.c +++ b/frame/1m/packm/bli_packm_cxk_rih.c @@ -34,17 +34,7 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - pack_t schema, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_rih_ker_vft #undef FUNCPTR_ARRAY_LENGTH #define FUNCPTR_ARRAY_LENGTH 32 diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c index dd6a4225e..004acaa60 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -131,11 +131,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_2xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -231,11 +231,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_4xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -339,11 +339,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_6xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -455,11 +455,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_8xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -579,11 +579,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_10xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -711,11 +711,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_12xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -851,11 +851,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_14xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -999,11 +999,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_16xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h index 2158bb041..b6811a7b4 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h @@ -32,25 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_3mis_ref ) +#undef packm_2xk_3mis_ker_name +#define packm_2xk_3mis_ker_name packm_2xk_3mis_ref +#undef packm_3xk_3mis_ker_name +#define packm_3xk_3mis_ker_name packm_3xk_3mis_ref +#undef packm_4xk_3mis_ker_name +#define packm_4xk_3mis_ker_name packm_4xk_3mis_ref +#undef packm_6xk_3mis_ker_name +#define packm_6xk_3mis_ker_name packm_6xk_3mis_ref +#undef packm_8xk_3mis_ker_name +#define packm_8xk_3mis_ker_name packm_8xk_3mis_ref +#undef packm_10xk_3mis_ker_name +#define packm_10xk_3mis_ker_name packm_10xk_3mis_ref +#undef packm_12xk_3mis_ker_name +#define packm_12xk_3mis_ker_name packm_12xk_3mis_ref +#undef packm_14xk_3mis_ker_name +#define packm_14xk_3mis_ker_name packm_14xk_3mis_ref +#undef packm_16xk_3mis_ker_name +#define packm_16xk_3mis_ker_name packm_16xk_3mis_ref +#undef packm_30xk_3mis_ker_name +#define packm_30xk_3mis_ker_name packm_30xk_3mis_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c index 35d2d9662..2279fdeca 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -126,11 +126,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_2xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -221,11 +221,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_4xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -324,11 +324,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_6xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -435,11 +435,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_8xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -554,11 +554,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_10xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -681,11 +681,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_12xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -816,11 +816,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_14xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -959,11 +959,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_16xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h index 506da3525..01bb65d7f 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h @@ -32,25 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_4mi_ref ) +#undef packm_2xk_4mi_ker_name +#define packm_2xk_4mi_ker_name packm_2xk_4mi_ref +#undef packm_3xk_4mi_ker_name +#define packm_3xk_4mi_ker_name packm_3xk_4mi_ref +#undef packm_4xk_4mi_ker_name +#define packm_4xk_4mi_ker_name packm_4xk_4mi_ref +#undef packm_6xk_4mi_ker_name +#define packm_6xk_4mi_ker_name packm_6xk_4mi_ref +#undef packm_8xk_4mi_ker_name +#define packm_8xk_4mi_ker_name packm_8xk_4mi_ref +#undef packm_10xk_4mi_ker_name +#define packm_10xk_4mi_ker_name packm_10xk_4mi_ref +#undef packm_12xk_4mi_ker_name +#define packm_12xk_4mi_ker_name packm_12xk_4mi_ref +#undef packm_14xk_4mi_ker_name +#define packm_14xk_4mi_ker_name packm_14xk_4mi_ref +#undef packm_16xk_4mi_ker_name +#define packm_16xk_4mi_ker_name packm_16xk_4mi_ref +#undef packm_30xk_4mi_ker_name +#define packm_30xk_4mi_ker_name packm_30xk_4mi_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_ref.c index 15abe2878..b33df08cf 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -132,11 +132,11 @@ INSERT_GENTFUNC_BASIC0( packm_2xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -233,11 +233,11 @@ INSERT_GENTFUNC_BASIC0( packm_3xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -332,11 +332,11 @@ INSERT_GENTFUNC_BASIC0( packm_4xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -420,11 +420,11 @@ INSERT_GENTFUNC_BASIC0( packm_6xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -543,11 +543,11 @@ INSERT_GENTFUNC_BASIC0( packm_8xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -647,11 +647,11 @@ INSERT_GENTFUNC_BASIC0( packm_10xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -759,11 +759,11 @@ INSERT_GENTFUNC_BASIC0( packm_12xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -879,11 +879,11 @@ INSERT_GENTFUNC_BASIC0( packm_14xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -1007,11 +1007,11 @@ INSERT_GENTFUNC_BASIC0( packm_16xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_ref.h index 3083c2e08..9a55e20ea 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_ref.h @@ -32,26 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_ref ) -INSERT_GENTPROT_BASIC( packm_3xk_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_ref ) +#undef packm_2xk_ker_name +#define packm_2xk_ker_name packm_2xk_ref +#undef packm_3xk_ker_name +#define packm_3xk_ker_name packm_3xk_ref +#undef packm_4xk_ker_name +#define packm_4xk_ker_name packm_4xk_ref +#undef packm_6xk_ker_name +#define packm_6xk_ker_name packm_6xk_ref +#undef packm_8xk_ker_name +#define packm_8xk_ker_name packm_8xk_ref +#undef packm_10xk_ker_name +#define packm_10xk_ker_name packm_10xk_ref +#undef packm_12xk_ker_name +#define packm_12xk_ker_name packm_12xk_ref +#undef packm_14xk_ker_name +#define packm_14xk_ker_name packm_14xk_ref +#undef packm_16xk_ker_name +#define packm_16xk_ker_name packm_16xk_ref +#undef packm_30xk_ker_name +#define packm_30xk_ker_name packm_30xk_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c index e0bdeb250..3f3634aee 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -219,12 +219,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_2xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -421,12 +421,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_4xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -645,12 +645,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_6xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -891,12 +891,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_8xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -1159,12 +1159,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_10xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -1449,12 +1449,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_12xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -1761,12 +1761,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_14xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -2095,12 +2095,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_16xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h index 70d037e0a..5a465a316 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h @@ -32,26 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_rih_ref ) +#undef packm_2xk_rih_ker_name +#define packm_2xk_rih_ker_name packm_2xk_rih_ref +#undef packm_3xk_rih_ker_name +#define packm_3xk_rih_ker_name packm_3xk_rih_ref +#undef packm_4xk_rih_ker_name +#define packm_4xk_rih_ker_name packm_4xk_rih_ref +#undef packm_6xk_rih_ker_name +#define packm_6xk_rih_ker_name packm_6xk_rih_ref +#undef packm_8xk_rih_ker_name +#define packm_8xk_rih_ker_name packm_8xk_rih_ref +#undef packm_10xk_rih_ker_name +#define packm_10xk_rih_ker_name packm_10xk_rih_ref +#undef packm_12xk_rih_ker_name +#define packm_12xk_rih_ker_name packm_12xk_rih_ref +#undef packm_14xk_rih_ker_name +#define packm_14xk_rih_ker_name packm_14xk_rih_ref +#undef packm_16xk_rih_ker_name +#define packm_16xk_rih_ker_name packm_16xk_rih_ref +#undef packm_30xk_rih_ker_name +#define packm_30xk_rih_ker_name packm_30xk_rih_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/include/bli_kernel_prototypes.h b/frame/include/bli_kernel_prototypes.h index d3524358c..e693825ff 100644 --- a/frame/include/bli_kernel_prototypes.h +++ b/frame/include/bli_kernel_prototypes.h @@ -68,6 +68,62 @@ #include "bli_l3_ukr.h" +// +// Level-1m +// + +#define bli_spackm_2xk_ker_name BLIS_SPACKM_2XK_KERNEL +#define bli_dpackm_2xk_ker_name BLIS_DPACKM_2XK_KERNEL +#define bli_cpackm_2xk_ker_name BLIS_CPACKM_2XK_KERNEL +#define bli_zpackm_2xk_ker_name BLIS_ZPACKM_2XK_KERNEL + +#define bli_spackm_3xk_ker_name BLIS_SPACKM_3XK_KERNEL +#define bli_dpackm_3xk_ker_name BLIS_DPACKM_3XK_KERNEL +#define bli_cpackm_3xk_ker_name BLIS_CPACKM_3XK_KERNEL +#define bli_zpackm_3xk_ker_name BLIS_ZPACKM_3XK_KERNEL + +#define bli_spackm_4xk_ker_name BLIS_SPACKM_4XK_KERNEL +#define bli_dpackm_4xk_ker_name BLIS_DPACKM_4XK_KERNEL +#define bli_cpackm_4xk_ker_name BLIS_CPACKM_4XK_KERNEL +#define bli_zpackm_4xk_ker_name BLIS_ZPACKM_4XK_KERNEL + +#define bli_spackm_6xk_ker_name BLIS_SPACKM_6XK_KERNEL +#define bli_dpackm_6xk_ker_name BLIS_DPACKM_6XK_KERNEL +#define bli_cpackm_6xk_ker_name BLIS_CPACKM_6XK_KERNEL +#define bli_zpackm_6xk_ker_name BLIS_ZPACKM_6XK_KERNEL + +#define bli_spackm_8xk_ker_name BLIS_SPACKM_8XK_KERNEL +#define bli_dpackm_8xk_ker_name BLIS_DPACKM_8XK_KERNEL +#define bli_cpackm_8xk_ker_name BLIS_CPACKM_8XK_KERNEL +#define bli_zpackm_8xk_ker_name BLIS_ZPACKM_8XK_KERNEL + +#define bli_spackm_10xk_ker_name BLIS_SPACKM_10XK_KERNEL +#define bli_dpackm_10xk_ker_name BLIS_DPACKM_10XK_KERNEL +#define bli_cpackm_10xk_ker_name BLIS_CPACKM_10XK_KERNEL +#define bli_zpackm_10xk_ker_name BLIS_ZPACKM_10XK_KERNEL + +#define bli_spackm_12xk_ker_name BLIS_SPACKM_12XK_KERNEL +#define bli_dpackm_12xk_ker_name BLIS_DPACKM_12XK_KERNEL +#define bli_cpackm_12xk_ker_name BLIS_CPACKM_12XK_KERNEL +#define bli_zpackm_12xk_ker_name BLIS_ZPACKM_12XK_KERNEL + +#define bli_spackm_14xk_ker_name BLIS_SPACKM_14XK_KERNEL +#define bli_dpackm_14xk_ker_name BLIS_DPACKM_14XK_KERNEL +#define bli_cpackm_14xk_ker_name BLIS_CPACKM_14XK_KERNEL +#define bli_zpackm_14xk_ker_name BLIS_ZPACKM_14XK_KERNEL + +#define bli_spackm_16xk_ker_name BLIS_SPACKM_16XK_KERNEL +#define bli_dpackm_16xk_ker_name BLIS_DPACKM_16XK_KERNEL +#define bli_cpackm_16xk_ker_name BLIS_CPACKM_16XK_KERNEL +#define bli_zpackm_16xk_ker_name BLIS_ZPACKM_16XK_KERNEL + +#define bli_spackm_30xk_ker_name BLIS_SPACKM_30XK_KERNEL +#define bli_dpackm_30xk_ker_name BLIS_DPACKM_30XK_KERNEL +#define bli_cpackm_30xk_ker_name BLIS_CPACKM_30XK_KERNEL +#define bli_zpackm_30xk_ker_name BLIS_ZPACKM_30XK_KERNEL + +#include "bli_l1m_ker.h" + // // Level-1f //