From 32db0adc218ea4ae370164dbe8d23b41cd3526d3 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 17 May 2016 15:20:16 -0500 Subject: [PATCH] Generate prototypes for user-defined packm kernels. Details: - Created template prototypes for packm kernels (in bli_l1m_ker.h), and then redefined reference packm kernels' prototyping headers in terms of this template, as is already done for level-1v, -1f, and -3 kernels. - Automatically generate prototypes for user-defined packm kernels in bli_kernel_prototypes.h (using the new template prototypes in bli_l1m_ker.h). - Defined packm kernel function types in bli_l1m_ft.h, including for packm kernels specific to induced methods, which are now used in bli_packm_cxk.c and friends rather than using a locally-defined function type. - In bli_packm_cxk.c, extended function pointer for packm kernels array from out to index 31 (from previous maximum of 17). This allows us to store the unrolled 30xk kernel in the array for use (on knc, for example). Note: This should have been done a long time ago. --- frame/1m/bli_l1m_ft.h | 60 ++++++++ frame/1m/bli_l1m_ker.h | 141 ++++++++++++++++++ frame/1m/packm/bli_packm_cxk.c | 72 +++++++-- frame/1m/packm/bli_packm_cxk_3mis.c | 11 +- frame/1m/packm/bli_packm_cxk_4mi.c | 11 +- frame/1m/packm/bli_packm_cxk_rih.c | 12 +- .../packm/ukernels/bli_packm_cxk_3mis_ref.c | 90 +++++------ .../packm/ukernels/bli_packm_cxk_3mis_ref.h | 45 +++--- .../1m/packm/ukernels/bli_packm_cxk_4mi_ref.c | 90 +++++------ .../1m/packm/ukernels/bli_packm_cxk_4mi_ref.h | 45 +++--- frame/1m/packm/ukernels/bli_packm_cxk_ref.c | 100 ++++++------- frame/1m/packm/ukernels/bli_packm_cxk_ref.h | 46 +++--- .../1m/packm/ukernels/bli_packm_cxk_rih_ref.c | 108 +++++++------- .../1m/packm/ukernels/bli_packm_cxk_rih_ref.h | 46 +++--- frame/include/bli_kernel_prototypes.h | 56 +++++++ 15 files changed, 615 insertions(+), 318 deletions(-) create mode 100644 frame/1m/bli_l1m_ker.h diff --git a/frame/1m/bli_l1m_ft.h b/frame/1m/bli_l1m_ft.h index 381f18513..4361c9fac 100644 --- a/frame/1m/bli_l1m_ft.h +++ b/frame/1m/bli_l1m_ft.h @@ -68,6 +68,66 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ INSERT_GENTDEF( packm ) +// NOTE: the following macros generate packm kernel function type definitions +// that are "ctyped" and void-typed, for each of the floating-point datatypes. +// However, we will only make use of the void-typed definitions because the +// functions such as bli_?packm_cxk() (currently) use arrays of function +// pointers to store and access the function pointers for various unrolling +// (register blocksize) values, and therefore they must all be of the same +// type (hence the use of void* for kappa, a, and p). + +// packm_ker + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conja, \ + dim_t n, \ + ctype* restrict kappa, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict p, inc_t ldp \ + ); + +INSERT_GENTDEF( packm_cxk_ker ) + + +// packm_3mis_ker + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conja, \ + dim_t n, \ + ctype* restrict kappa, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict p, inc_t is_p, inc_t ldp \ + ); + +INSERT_GENTDEF( packm_cxk_3mis_ker ) +INSERT_GENTDEF( packm_cxk_4mi_ker ) + + +// packm_rih_ker + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + ctype* restrict kappa, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict p, inc_t ldp \ + ); + +INSERT_GENTDEF( packm_cxk_rih_ker ) + #endif diff --git a/frame/1m/bli_l1m_ker.h b/frame/1m/bli_l1m_ker.h new file mode 100644 index 000000000..794609f44 --- /dev/null +++ b/frame/1m/bli_l1m_ker.h @@ -0,0 +1,141 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + + +// +// Define template prototypes for level-1m kernels. +// + +// native packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_3xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_ker_name ) + + +// 3mis packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_3mis_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_3mis_ker_name ) + + +// 4mi packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_4mi_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_4mi_ker_name ) + + +// rih packm kernels + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ + ); + +INSERT_GENTPROT_BASIC( packm_2xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_3xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_4xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_6xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_8xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_10xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_12xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_14xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_16xk_rih_ker_name ) +INSERT_GENTPROT_BASIC( packm_30xk_rih_ker_name ) + diff --git a/frame/1m/packm/bli_packm_cxk.c b/frame/1m/packm/bli_packm_cxk.c index c50b06456..3c2ab6fd0 100644 --- a/frame/1m/packm/bli_packm_cxk.c +++ b/frame/1m/packm/bli_packm_cxk.c @@ -34,19 +34,10 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_ker_vft #undef FUNCPTR_ARRAY_LENGTH -#define FUNCPTR_ARRAY_LENGTH 18 +#define FUNCPTR_ARRAY_LENGTH 32 static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = { @@ -149,6 +140,65 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = { NULL, NULL, NULL, NULL, }, + /* micro-panel width = 18 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 19 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 20 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 21 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 22 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 23 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 24 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 25 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 26 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 27 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 28 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 29 */ + { + NULL, NULL, NULL, NULL, + }, + /* micro-panel width = 30 */ + { + BLIS_SPACKM_30XK_KERNEL, + BLIS_CPACKM_30XK_KERNEL, + BLIS_DPACKM_30XK_KERNEL, + BLIS_ZPACKM_30XK_KERNEL, + }, + /* micro-panel width = 31 */ + { + NULL, NULL, NULL, NULL, + }, }; diff --git a/frame/1m/packm/bli_packm_cxk_3mis.c b/frame/1m/packm/bli_packm_cxk_3mis.c index 80c388096..da1ee28ca 100644 --- a/frame/1m/packm/bli_packm_cxk_3mis.c +++ b/frame/1m/packm/bli_packm_cxk_3mis.c @@ -34,16 +34,7 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t is_p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_3mis_ker_vft #undef FUNCPTR_ARRAY_LENGTH #define FUNCPTR_ARRAY_LENGTH 32 diff --git a/frame/1m/packm/bli_packm_cxk_4mi.c b/frame/1m/packm/bli_packm_cxk_4mi.c index c0291d245..2a906fac6 100644 --- a/frame/1m/packm/bli_packm_cxk_4mi.c +++ b/frame/1m/packm/bli_packm_cxk_4mi.c @@ -34,16 +34,7 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t is_p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_4mi_ker_vft #undef FUNCPTR_ARRAY_LENGTH #define FUNCPTR_ARRAY_LENGTH 32 diff --git a/frame/1m/packm/bli_packm_cxk_rih.c b/frame/1m/packm/bli_packm_cxk_rih.c index ec70c08c1..f019eed0b 100644 --- a/frame/1m/packm/bli_packm_cxk_rih.c +++ b/frame/1m/packm/bli_packm_cxk_rih.c @@ -34,17 +34,7 @@ #include "blis.h" -#define FUNCPTR_T packm_cxk_fp - -typedef void (*FUNCPTR_T) - ( - conj_t conja, - pack_t schema, - dim_t panel_len, - void* kappa, - void* a, inc_t inca, inc_t lda, - void* p, inc_t ldp - ); +#define FUNCPTR_T packm_cxk_rih_ker_vft #undef FUNCPTR_ARRAY_LENGTH #define FUNCPTR_ARRAY_LENGTH 32 diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c index dd6a4225e..004acaa60 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -131,11 +131,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_2xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -231,11 +231,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_4xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -339,11 +339,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_6xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -455,11 +455,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_8xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -579,11 +579,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_10xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -711,11 +711,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_12xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -851,11 +851,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_14xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -999,11 +999,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_16xk_3mis_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h index 2158bb041..b6811a7b4 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_3mis_ref.h @@ -32,25 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_3mis_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_3mis_ref ) +#undef packm_2xk_3mis_ker_name +#define packm_2xk_3mis_ker_name packm_2xk_3mis_ref +#undef packm_3xk_3mis_ker_name +#define packm_3xk_3mis_ker_name packm_3xk_3mis_ref +#undef packm_4xk_3mis_ker_name +#define packm_4xk_3mis_ker_name packm_4xk_3mis_ref +#undef packm_6xk_3mis_ker_name +#define packm_6xk_3mis_ker_name packm_6xk_3mis_ref +#undef packm_8xk_3mis_ker_name +#define packm_8xk_3mis_ker_name packm_8xk_3mis_ref +#undef packm_10xk_3mis_ker_name +#define packm_10xk_3mis_ker_name packm_10xk_3mis_ref +#undef packm_12xk_3mis_ker_name +#define packm_12xk_3mis_ker_name packm_12xk_3mis_ref +#undef packm_14xk_3mis_ker_name +#define packm_14xk_3mis_ker_name packm_14xk_3mis_ref +#undef packm_16xk_3mis_ker_name +#define packm_16xk_3mis_ker_name packm_16xk_3mis_ref +#undef packm_30xk_3mis_ker_name +#define packm_30xk_3mis_ker_name packm_30xk_3mis_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c index 35d2d9662..2279fdeca 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -126,11 +126,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_2xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -221,11 +221,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_4xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -324,11 +324,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_6xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -435,11 +435,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_8xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -554,11 +554,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_10xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -681,11 +681,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_12xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -816,11 +816,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_14xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -959,11 +959,11 @@ INSERT_GENTFUNCCO_BASIC0( packm_16xk_4mi_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t is_p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h index 506da3525..01bb65d7f 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_4mi_ref.h @@ -32,25 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t is_p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_4mi_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_4mi_ref ) +#undef packm_2xk_4mi_ker_name +#define packm_2xk_4mi_ker_name packm_2xk_4mi_ref +#undef packm_3xk_4mi_ker_name +#define packm_3xk_4mi_ker_name packm_3xk_4mi_ref +#undef packm_4xk_4mi_ker_name +#define packm_4xk_4mi_ker_name packm_4xk_4mi_ref +#undef packm_6xk_4mi_ker_name +#define packm_6xk_4mi_ker_name packm_6xk_4mi_ref +#undef packm_8xk_4mi_ker_name +#define packm_8xk_4mi_ker_name packm_8xk_4mi_ref +#undef packm_10xk_4mi_ker_name +#define packm_10xk_4mi_ker_name packm_10xk_4mi_ref +#undef packm_12xk_4mi_ker_name +#define packm_12xk_4mi_ker_name packm_12xk_4mi_ref +#undef packm_14xk_4mi_ker_name +#define packm_14xk_4mi_ker_name packm_14xk_4mi_ref +#undef packm_16xk_4mi_ker_name +#define packm_16xk_4mi_ker_name packm_16xk_4mi_ref +#undef packm_30xk_4mi_ker_name +#define packm_30xk_4mi_ker_name packm_30xk_4mi_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_ref.c index 15abe2878..b33df08cf 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -132,11 +132,11 @@ INSERT_GENTFUNC_BASIC0( packm_2xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -233,11 +233,11 @@ INSERT_GENTFUNC_BASIC0( packm_3xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -332,11 +332,11 @@ INSERT_GENTFUNC_BASIC0( packm_4xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -420,11 +420,11 @@ INSERT_GENTFUNC_BASIC0( packm_6xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -543,11 +543,11 @@ INSERT_GENTFUNC_BASIC0( packm_8xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -647,11 +647,11 @@ INSERT_GENTFUNC_BASIC0( packm_10xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -759,11 +759,11 @@ INSERT_GENTFUNC_BASIC0( packm_12xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -879,11 +879,11 @@ INSERT_GENTFUNC_BASIC0( packm_14xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ @@ -1007,11 +1007,11 @@ INSERT_GENTFUNC_BASIC0( packm_16xk_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ ctype* restrict kappa_cast = kappa; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_ref.h index 3083c2e08..9a55e20ea 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_ref.h @@ -32,26 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_ref ) -INSERT_GENTPROT_BASIC( packm_3xk_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_ref ) +#undef packm_2xk_ker_name +#define packm_2xk_ker_name packm_2xk_ref +#undef packm_3xk_ker_name +#define packm_3xk_ker_name packm_3xk_ref +#undef packm_4xk_ker_name +#define packm_4xk_ker_name packm_4xk_ref +#undef packm_6xk_ker_name +#define packm_6xk_ker_name packm_6xk_ref +#undef packm_8xk_ker_name +#define packm_8xk_ker_name packm_8xk_ref +#undef packm_10xk_ker_name +#define packm_10xk_ker_name packm_10xk_ref +#undef packm_12xk_ker_name +#define packm_12xk_ker_name packm_12xk_ref +#undef packm_14xk_ker_name +#define packm_14xk_ker_name packm_14xk_ref +#undef packm_16xk_ker_name +#define packm_16xk_ker_name packm_16xk_ref +#undef packm_30xk_ker_name +#define packm_30xk_ker_name packm_30xk_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c index e0bdeb250..3f3634aee 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c +++ b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -219,12 +219,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_2xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -421,12 +421,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_4xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -645,12 +645,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_6xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -891,12 +891,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_8xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -1159,12 +1159,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_10xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -1449,12 +1449,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_12xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -1761,12 +1761,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_14xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ @@ -2095,12 +2095,12 @@ INSERT_GENTFUNCCO_BASIC0( packm_16xk_rih_ref ) \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ + conj_t conja, \ + pack_t schema, \ + dim_t n, \ + void* restrict kappa, \ + void* restrict a, inc_t inca, inc_t lda, \ + void* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ diff --git a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h index 70d037e0a..5a465a316 100644 --- a/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h +++ b/frame/1m/packm/ukernels/bli_packm_cxk_rih_ref.h @@ -32,26 +32,30 @@ */ -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - pack_t schema, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ - ); +// Redefine level-1m kernel API names to induce prototypes. -INSERT_GENTPROT_BASIC( packm_2xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_4xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_6xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_8xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_10xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_12xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_14xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_16xk_rih_ref ) -INSERT_GENTPROT_BASIC( packm_30xk_rih_ref ) +#undef packm_2xk_rih_ker_name +#define packm_2xk_rih_ker_name packm_2xk_rih_ref +#undef packm_3xk_rih_ker_name +#define packm_3xk_rih_ker_name packm_3xk_rih_ref +#undef packm_4xk_rih_ker_name +#define packm_4xk_rih_ker_name packm_4xk_rih_ref +#undef packm_6xk_rih_ker_name +#define packm_6xk_rih_ker_name packm_6xk_rih_ref +#undef packm_8xk_rih_ker_name +#define packm_8xk_rih_ker_name packm_8xk_rih_ref +#undef packm_10xk_rih_ker_name +#define packm_10xk_rih_ker_name packm_10xk_rih_ref +#undef packm_12xk_rih_ker_name +#define packm_12xk_rih_ker_name packm_12xk_rih_ref +#undef packm_14xk_rih_ker_name +#define packm_14xk_rih_ker_name packm_14xk_rih_ref +#undef packm_16xk_rih_ker_name +#define packm_16xk_rih_ker_name packm_16xk_rih_ref +#undef packm_30xk_rih_ker_name +#define packm_30xk_rih_ker_name packm_30xk_rih_ref + +// Include the level-1m kernel API template. + +#include "bli_l1m_ker.h" diff --git a/frame/include/bli_kernel_prototypes.h b/frame/include/bli_kernel_prototypes.h index d3524358c..e693825ff 100644 --- a/frame/include/bli_kernel_prototypes.h +++ b/frame/include/bli_kernel_prototypes.h @@ -68,6 +68,62 @@ #include "bli_l3_ukr.h" +// +// Level-1m +// + +#define bli_spackm_2xk_ker_name BLIS_SPACKM_2XK_KERNEL +#define bli_dpackm_2xk_ker_name BLIS_DPACKM_2XK_KERNEL +#define bli_cpackm_2xk_ker_name BLIS_CPACKM_2XK_KERNEL +#define bli_zpackm_2xk_ker_name BLIS_ZPACKM_2XK_KERNEL + +#define bli_spackm_3xk_ker_name BLIS_SPACKM_3XK_KERNEL +#define bli_dpackm_3xk_ker_name BLIS_DPACKM_3XK_KERNEL +#define bli_cpackm_3xk_ker_name BLIS_CPACKM_3XK_KERNEL +#define bli_zpackm_3xk_ker_name BLIS_ZPACKM_3XK_KERNEL + +#define bli_spackm_4xk_ker_name BLIS_SPACKM_4XK_KERNEL +#define bli_dpackm_4xk_ker_name BLIS_DPACKM_4XK_KERNEL +#define bli_cpackm_4xk_ker_name BLIS_CPACKM_4XK_KERNEL +#define bli_zpackm_4xk_ker_name BLIS_ZPACKM_4XK_KERNEL + +#define bli_spackm_6xk_ker_name BLIS_SPACKM_6XK_KERNEL +#define bli_dpackm_6xk_ker_name BLIS_DPACKM_6XK_KERNEL +#define bli_cpackm_6xk_ker_name BLIS_CPACKM_6XK_KERNEL +#define bli_zpackm_6xk_ker_name BLIS_ZPACKM_6XK_KERNEL + +#define bli_spackm_8xk_ker_name BLIS_SPACKM_8XK_KERNEL +#define bli_dpackm_8xk_ker_name BLIS_DPACKM_8XK_KERNEL +#define bli_cpackm_8xk_ker_name BLIS_CPACKM_8XK_KERNEL +#define bli_zpackm_8xk_ker_name BLIS_ZPACKM_8XK_KERNEL + +#define bli_spackm_10xk_ker_name BLIS_SPACKM_10XK_KERNEL +#define bli_dpackm_10xk_ker_name BLIS_DPACKM_10XK_KERNEL +#define bli_cpackm_10xk_ker_name BLIS_CPACKM_10XK_KERNEL +#define bli_zpackm_10xk_ker_name BLIS_ZPACKM_10XK_KERNEL + +#define bli_spackm_12xk_ker_name BLIS_SPACKM_12XK_KERNEL +#define bli_dpackm_12xk_ker_name BLIS_DPACKM_12XK_KERNEL +#define bli_cpackm_12xk_ker_name BLIS_CPACKM_12XK_KERNEL +#define bli_zpackm_12xk_ker_name BLIS_ZPACKM_12XK_KERNEL + +#define bli_spackm_14xk_ker_name BLIS_SPACKM_14XK_KERNEL +#define bli_dpackm_14xk_ker_name BLIS_DPACKM_14XK_KERNEL +#define bli_cpackm_14xk_ker_name BLIS_CPACKM_14XK_KERNEL +#define bli_zpackm_14xk_ker_name BLIS_ZPACKM_14XK_KERNEL + +#define bli_spackm_16xk_ker_name BLIS_SPACKM_16XK_KERNEL +#define bli_dpackm_16xk_ker_name BLIS_DPACKM_16XK_KERNEL +#define bli_cpackm_16xk_ker_name BLIS_CPACKM_16XK_KERNEL +#define bli_zpackm_16xk_ker_name BLIS_ZPACKM_16XK_KERNEL + +#define bli_spackm_30xk_ker_name BLIS_SPACKM_30XK_KERNEL +#define bli_dpackm_30xk_ker_name BLIS_DPACKM_30XK_KERNEL +#define bli_cpackm_30xk_ker_name BLIS_CPACKM_30XK_KERNEL +#define bli_zpackm_30xk_ker_name BLIS_ZPACKM_30XK_KERNEL + +#include "bli_l1m_ker.h" + // // Level-1f //