mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Renamed _ri, _ri3 packm ukernels to _4m, _3m.
Details: - Renamed packm ukernels, _cxk dispatcher, and structure-aware _cxk helper functions to use _4m and _3m instead of _ri and _ri3 suffixes. - Updated names of cpp macros that correspond to packm ukernels.
This commit is contained in:
@@ -50,6 +50,6 @@
|
||||
#include "bli_packm_tri_cxk.h"
|
||||
|
||||
#include "bli_packm_cxk.h"
|
||||
#include "bli_packm_cxk_ri.h"
|
||||
#include "bli_packm_cxk_ri3.h"
|
||||
#include "bli_packm_cxk_4m.h"
|
||||
#include "bli_packm_cxk_3m.h"
|
||||
|
||||
|
||||
@@ -371,19 +371,19 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
if( packm_thread_my_iter( it, thread ) ) \
|
||||
{ \
|
||||
PASTEMAC(ch,packm_tri_cxk_ri3)( strucc, \
|
||||
diagoffp_i, \
|
||||
diagc, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
invdiag, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_use, rs_c, cs_c, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
PASTEMAC(ch,packm_tri_cxk_3m)( strucc, \
|
||||
diagoffp_i, \
|
||||
diagc, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
invdiag, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_use, rs_c, cs_c, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
\
|
||||
@@ -411,17 +411,17 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
if( packm_thread_my_iter( it, thread ) ) \
|
||||
{ \
|
||||
PASTEMAC(ch,packm_herm_cxk_ri3)( strucc, \
|
||||
diagoffc_i, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
PASTEMAC(ch,packm_herm_cxk_3m)( strucc, \
|
||||
diagoffc_i, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* NOTE: This value is equivalent to (ps_p*3)/2. */ \
|
||||
@@ -438,17 +438,17 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
if( packm_thread_my_iter( it, thread ) ) \
|
||||
{ \
|
||||
PASTEMAC(ch,packm_gen_cxk_ri3)( BLIS_GENERAL, \
|
||||
0, \
|
||||
BLIS_DENSE, \
|
||||
conjc, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
PASTEMAC(ch,packm_gen_cxk_3m)( BLIS_GENERAL, \
|
||||
0, \
|
||||
BLIS_DENSE, \
|
||||
conjc, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* NOTE: This value is equivalent to (ps_p*3)/2. */ \
|
||||
|
||||
@@ -371,7 +371,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
if( packm_thread_my_iter( it, thread ) ) \
|
||||
{ \
|
||||
PASTEMAC(ch,packm_tri_cxk_ri)( strucc, \
|
||||
PASTEMAC(ch,packm_tri_cxk_4m)( strucc, \
|
||||
diagoffp_i, \
|
||||
diagc, \
|
||||
uploc, \
|
||||
@@ -418,7 +418,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
if( packm_thread_my_iter( it, thread ) ) \
|
||||
{ \
|
||||
PASTEMAC(ch,packm_herm_cxk_ri)( strucc, \
|
||||
PASTEMAC(ch,packm_herm_cxk_4m)( strucc, \
|
||||
diagoffc_i, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
@@ -445,7 +445,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
if( packm_thread_my_iter( it, thread ) ) \
|
||||
{ \
|
||||
PASTEMAC(ch,packm_gen_cxk_ri)( BLIS_GENERAL, \
|
||||
PASTEMAC(ch,packm_gen_cxk_4m)( BLIS_GENERAL, \
|
||||
0, \
|
||||
BLIS_DENSE, \
|
||||
conjc, \
|
||||
|
||||
@@ -60,9 +60,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 2 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_2XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_2XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_2XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_2XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 3 */
|
||||
{
|
||||
@@ -71,9 +71,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 4 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_4XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_4XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_4XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_4XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 5 */
|
||||
{
|
||||
@@ -82,9 +82,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 6 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_6XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_6XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_6XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_6XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 7 */
|
||||
{
|
||||
@@ -93,9 +93,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 8 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_8XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_8XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_8XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_8XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 9 */
|
||||
{
|
||||
@@ -104,9 +104,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 10 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_10XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_10XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_10XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_10XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 11 */
|
||||
{
|
||||
@@ -115,9 +115,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 12 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_12XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_12XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_12XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_12XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 13 */
|
||||
{
|
||||
@@ -126,9 +126,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 14 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_14XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_14XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_14XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_14XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 15 */
|
||||
{
|
||||
@@ -137,9 +137,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 16 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_16XK_RI3_KERNEL,
|
||||
BLIS_CPACKM_16XK_3M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_16XK_RI3_KERNEL,
|
||||
BLIS_ZPACKM_16XK_3M_KERNEL,
|
||||
},
|
||||
/* panel width = 17 */
|
||||
{
|
||||
@@ -197,7 +197,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype_r* restrict a_i = ( ctype_r* )a + 1; \
|
||||
ctype_r* restrict p_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict p_i = ( ctype_r* )p + psp; \
|
||||
ctype_r* restrict p_ri = ( ctype_r* )p + 2*psp; \
|
||||
ctype_r* restrict p_rpi = ( ctype_r* )p + 2*psp; \
|
||||
dim_t inca2 = 2*inca; \
|
||||
dim_t lda2 = 2*lda; \
|
||||
\
|
||||
@@ -212,11 +212,11 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
ctype_r* restrict alpha11_r = a_r + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict alpha11_i = a_i + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict pi11_r = p_r + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_i = p_i + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_ri = p_ri + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict alpha11_r = a_r + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict alpha11_i = a_i + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict pi11_r = p_r + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_i = p_i + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_rpi = p_rpi + (i )*1 + (j )*ldp; \
|
||||
\
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, \
|
||||
*kappa_i, \
|
||||
@@ -224,7 +224,7 @@ void PASTEMAC(ch,varname)( \
|
||||
*alpha11_i, \
|
||||
*pi11_r, \
|
||||
*pi11_i, \
|
||||
*pi11_ri ); \
|
||||
*pi11_rpi ); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -234,11 +234,11 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
ctype_r* restrict alpha11_r = a_r + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict alpha11_i = a_i + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict pi11_r = p_r + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_i = p_i + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_ri = p_ri + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict alpha11_r = a_r + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict alpha11_i = a_i + (i )*inca2 + (j )*lda2; \
|
||||
ctype_r* restrict pi11_r = p_r + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_i = p_i + (i )*1 + (j )*ldp; \
|
||||
ctype_r* restrict pi11_rpi = p_rpi + (i )*1 + (j )*ldp; \
|
||||
\
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, \
|
||||
*kappa_i, \
|
||||
@@ -246,12 +246,12 @@ void PASTEMAC(ch,varname)( \
|
||||
*alpha11_i, \
|
||||
*pi11_r, \
|
||||
*pi11_i, \
|
||||
*pi11_ri ); \
|
||||
*pi11_rpi ); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_cxk_ri3 )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_cxk_3m )
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_packm_ref_cxk_ri3.h"
|
||||
#include "bli_packm_ref_cxk_3m.h"
|
||||
|
||||
|
||||
#undef GENTPROTCO
|
||||
@@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p, inc_t psp, inc_t ldp \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_cxk_ri3 )
|
||||
INSERT_GENTPROTCO_BASIC( packm_cxk_3m )
|
||||
|
||||
@@ -60,9 +60,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 2 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_2XK_RI_KERNEL,
|
||||
BLIS_CPACKM_2XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_2XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_2XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 3 */
|
||||
{
|
||||
@@ -71,9 +71,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 4 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_4XK_RI_KERNEL,
|
||||
BLIS_CPACKM_4XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_4XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_4XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 5 */
|
||||
{
|
||||
@@ -82,9 +82,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 6 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_6XK_RI_KERNEL,
|
||||
BLIS_CPACKM_6XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_6XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_6XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 7 */
|
||||
{
|
||||
@@ -93,9 +93,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 8 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_8XK_RI_KERNEL,
|
||||
BLIS_CPACKM_8XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_8XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_8XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 9 */
|
||||
{
|
||||
@@ -104,9 +104,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 10 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_10XK_RI_KERNEL,
|
||||
BLIS_CPACKM_10XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_10XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_10XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 11 */
|
||||
{
|
||||
@@ -115,9 +115,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 12 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_12XK_RI_KERNEL,
|
||||
BLIS_CPACKM_12XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_12XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_12XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 13 */
|
||||
{
|
||||
@@ -126,9 +126,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 14 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_14XK_RI_KERNEL,
|
||||
BLIS_CPACKM_14XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_14XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_14XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 15 */
|
||||
{
|
||||
@@ -137,9 +137,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
/* panel width = 16 */
|
||||
{
|
||||
NULL,
|
||||
BLIS_CPACKM_16XK_RI_KERNEL,
|
||||
BLIS_CPACKM_16XK_4M_KERNEL,
|
||||
NULL,
|
||||
BLIS_ZPACKM_16XK_RI_KERNEL,
|
||||
BLIS_ZPACKM_16XK_4M_KERNEL,
|
||||
},
|
||||
/* panel width = 17 */
|
||||
{
|
||||
@@ -249,5 +249,5 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_cxk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_cxk_4m )
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_packm_ref_cxk_ri.h"
|
||||
#include "bli_packm_ref_cxk_4m.h"
|
||||
|
||||
|
||||
#undef GENTPROTCO
|
||||
@@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p, inc_t psp, inc_t ldp \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_cxk_ri )
|
||||
INSERT_GENTPROTCO_BASIC( packm_cxk_4m )
|
||||
|
||||
@@ -192,7 +192,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri)( conjc, \
|
||||
PASTEMAC(ch,packm_cxk_4m)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
@@ -256,7 +256,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_gen_cxk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_gen_cxk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -317,12 +317,12 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri3)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
c, incc, ldc, \
|
||||
p, psp, ldp ); \
|
||||
PASTEMAC(ch,packm_cxk_3m)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
c, incc, ldc, \
|
||||
p, psp, ldp ); \
|
||||
\
|
||||
\
|
||||
/* The packed memory region was acquired/allocated with "aligned"
|
||||
@@ -334,12 +334,12 @@ void PASTEMAC(ch,varname)( \
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (i )*rs_p; \
|
||||
ctype_r* p_edge_ri = ( ctype_r* )p + 2*psp + (i )*rs_p; \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (i )*rs_p; \
|
||||
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*psp + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC(chr,setm)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
@@ -361,17 +361,17 @@ void PASTEMAC(ch,varname)( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero_r, \
|
||||
p_edge_ri, rs_p, cs_p ); \
|
||||
p_edge_rpi, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (j )*cs_p; \
|
||||
ctype_r* p_edge_ri = ( ctype_r* )p + 2*psp + (j )*cs_p; \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (j )*cs_p; \
|
||||
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*psp + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC(chr,setm)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
@@ -393,9 +393,9 @@ void PASTEMAC(ch,varname)( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero_r, \
|
||||
p_edge_ri, rs_p, cs_p ); \
|
||||
p_edge_rpi, rs_p, cs_p ); \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_gen_cxk_ri3 )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_gen_cxk_3m )
|
||||
|
||||
|
||||
@@ -70,6 +70,6 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_gen_cxk_ri )
|
||||
INSERT_GENTPROTCO_BASIC( packm_gen_cxk_4m )
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_gen_cxk_ri3 )
|
||||
INSERT_GENTPROTCO_BASIC( packm_gen_cxk_3m )
|
||||
|
||||
@@ -396,7 +396,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri)( conjc, \
|
||||
PASTEMAC(ch,packm_cxk_4m)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
@@ -470,7 +470,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
triangle of c11. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri)( conjc10, \
|
||||
PASTEMAC(ch,packm_cxk_4m)( conjc10, \
|
||||
p10_dim, \
|
||||
p10_len, \
|
||||
kappa, \
|
||||
@@ -479,7 +479,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Pack to p12. For lower storage, this includes the unstored
|
||||
triangle of c11. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri)( conjc12, \
|
||||
PASTEMAC(ch,packm_cxk_4m)( conjc12, \
|
||||
p12_dim, \
|
||||
p12_len, \
|
||||
kappa, \
|
||||
@@ -627,7 +627,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_herm_cxk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_herm_cxk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -732,12 +732,12 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri3)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
c, incc, ldc, \
|
||||
p, psp, ldp ); \
|
||||
PASTEMAC(ch,packm_cxk_3m)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
c, incc, ldc, \
|
||||
p, psp, ldp ); \
|
||||
} \
|
||||
else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \
|
||||
{ \
|
||||
@@ -806,21 +806,21 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
triangle of c11. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri3)( conjc10, \
|
||||
p10_dim, \
|
||||
p10_len, \
|
||||
kappa, \
|
||||
c10, incc10, ldc10, \
|
||||
p10, psp, ldp ); \
|
||||
PASTEMAC(ch,packm_cxk_3m)( conjc10, \
|
||||
p10_dim, \
|
||||
p10_len, \
|
||||
kappa, \
|
||||
c10, incc10, ldc10, \
|
||||
p10, psp, ldp ); \
|
||||
\
|
||||
/* Pack to p12. For lower storage, this includes the unstored
|
||||
triangle of c11. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri3)( conjc12, \
|
||||
p12_dim, \
|
||||
p12_len, \
|
||||
kappa, \
|
||||
c12, incc12, ldc12, \
|
||||
p12, psp, ldp ); \
|
||||
PASTEMAC(ch,packm_cxk_3m)( conjc12, \
|
||||
p12_dim, \
|
||||
p12_len, \
|
||||
kappa, \
|
||||
c12, incc12, ldc12, \
|
||||
p12, psp, ldp ); \
|
||||
\
|
||||
/* Pack the stored triangle of c11 to p11. */ \
|
||||
{ \
|
||||
@@ -902,18 +902,18 @@ void PASTEMAC(ch,varname)( \
|
||||
/* Update the p11 section of the ri panel. It simply needs
|
||||
to contain the sum of p11_r + p11_i. */ \
|
||||
{ \
|
||||
ctype_r* p11_ri = p11_i + psp; \
|
||||
ctype_r* p11_rpi = p11_i + psp; \
|
||||
\
|
||||
for ( j = 0; j < p11_n; ++j ) \
|
||||
for ( i = 0; i < p11_m; ++i ) \
|
||||
{ \
|
||||
ctype_r* pi11_r = p11_r + (i )*rs_p11 + (j )*cs_p11; \
|
||||
ctype_r* pi11_i = p11_i + (i )*rs_p11 + (j )*cs_p11; \
|
||||
ctype_r* pi11_ri = p11_ri + (i )*rs_p11 + (j )*cs_p11; \
|
||||
ctype_r* pi11_r = p11_r + (i )*rs_p11 + (j )*cs_p11; \
|
||||
ctype_r* pi11_i = p11_i + (i )*rs_p11 + (j )*cs_p11; \
|
||||
ctype_r* pi11_rpi = p11_rpi + (i )*rs_p11 + (j )*cs_p11; \
|
||||
\
|
||||
PASTEMAC(chr,add3s)( *pi11_r, \
|
||||
*pi11_i, \
|
||||
*pi11_ri ); \
|
||||
*pi11_rpi ); \
|
||||
} \
|
||||
} \
|
||||
/*
|
||||
@@ -934,12 +934,12 @@ void PASTEMAC(ch,varname)( \
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (i )*rs_p; \
|
||||
ctype_r* p_edge_ri = ( ctype_r* )p + 2*psp + (i )*rs_p; \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (i )*rs_p; \
|
||||
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*psp + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC(chr,setm)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
@@ -961,17 +961,17 @@ void PASTEMAC(ch,varname)( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero_r, \
|
||||
p_edge_ri, rs_p, cs_p ); \
|
||||
p_edge_rpi, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (j )*cs_p; \
|
||||
ctype_r* p_edge_ri = ( ctype_r* )p + 2*psp + (j )*cs_p; \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (j )*cs_p; \
|
||||
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*psp + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC(chr,setm)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
@@ -993,9 +993,9 @@ void PASTEMAC(ch,varname)( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero_r, \
|
||||
p_edge_ri, rs_p, cs_p ); \
|
||||
p_edge_rpi, rs_p, cs_p ); \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_herm_cxk_ri3 )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_herm_cxk_3m )
|
||||
|
||||
|
||||
@@ -70,6 +70,6 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_herm_cxk_ri )
|
||||
INSERT_GENTPROTCO_BASIC( packm_herm_cxk_4m )
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_herm_cxk_ri3 )
|
||||
INSERT_GENTPROTCO_BASIC( packm_herm_cxk_3m )
|
||||
|
||||
@@ -284,7 +284,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri)( conjc, \
|
||||
PASTEMAC(ch,packm_cxk_4m)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
@@ -449,7 +449,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_tri_cxk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_tri_cxk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -520,20 +520,20 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk_ri3)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
c, incc, ldc, \
|
||||
p, psp, ldp ); \
|
||||
PASTEMAC(ch,packm_cxk_3m)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa, \
|
||||
c, incc, ldc, \
|
||||
p, psp, ldp ); \
|
||||
\
|
||||
\
|
||||
/* Tweak the panel according to its triangular structure */ \
|
||||
{ \
|
||||
dim_t j = bli_abs( diagoffp ); \
|
||||
ctype_r* p11_r = ( ctype_r* )p + (j )*ldp; \
|
||||
ctype_r* p11_i = ( ctype_r* )p + psp + (j )*ldp; \
|
||||
ctype_r* p11_ri = ( ctype_r* )p + 2*psp + (j )*ldp; \
|
||||
dim_t j = bli_abs( diagoffp ); \
|
||||
ctype_r* p11_r = ( ctype_r* )p + (j )*ldp; \
|
||||
ctype_r* p11_i = ( ctype_r* )p + psp + (j )*ldp; \
|
||||
ctype_r* p11_rpi = ( ctype_r* )p + 2*psp + (j )*ldp; \
|
||||
\
|
||||
/* If the diagonal of c is implicitly unit, explicitly set the
|
||||
the diagonal of the packed panel to kappa. */ \
|
||||
@@ -556,7 +556,7 @@ void PASTEMAC(ch,varname)( \
|
||||
m_panel, \
|
||||
n_panel, \
|
||||
&kappa_r, \
|
||||
p11_ri, rs_p11, cs_p11 ); \
|
||||
p11_rpi, rs_p11, cs_p11 ); \
|
||||
} \
|
||||
\
|
||||
/* If requested, invert the diagonal of the packed panel. Note
|
||||
@@ -609,7 +609,7 @@ void PASTEMAC(ch,varname)( \
|
||||
panel_dim, \
|
||||
panel_dim, \
|
||||
zero_r, \
|
||||
p11_ri, rs_p11, cs_p11 ); \
|
||||
p11_rpi, rs_p11, cs_p11 ); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
@@ -623,12 +623,12 @@ void PASTEMAC(ch,varname)( \
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (i )*rs_p; \
|
||||
ctype_r* p_edge_ri = ( ctype_r* )p + 2*psp + (i )*rs_p; \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (i )*rs_p; \
|
||||
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*psp + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC(chr,setm)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
@@ -650,17 +650,17 @@ void PASTEMAC(ch,varname)( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero_r, \
|
||||
p_edge_ri, rs_p, cs_p ); \
|
||||
p_edge_rpi, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (j )*cs_p; \
|
||||
ctype_r* p_edge_ri = ( ctype_r* )p + 2*psp + (j )*cs_p; \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
|
||||
ctype_r* p_edge_i = ( ctype_r* )p + psp + (j )*cs_p; \
|
||||
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*psp + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC(chr,setm)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
@@ -682,7 +682,7 @@ void PASTEMAC(ch,varname)( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero_r, \
|
||||
p_edge_ri, rs_p, cs_p ); \
|
||||
p_edge_rpi, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* If this panel is an edge case in both panel dimension and length,
|
||||
@@ -716,5 +716,5 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_tri_cxk_ri3 )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_tri_cxk_3m )
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_tri_cxk_ri )
|
||||
INSERT_GENTPROTCO_BASIC( packm_tri_cxk_4m )
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( packm_tri_cxk_ri3 )
|
||||
INSERT_GENTPROTCO_BASIC( packm_tri_cxk_3m )
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -43,12 +43,12 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p, inc_t psp, inc_t ldp \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_ref_2xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_4xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_6xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_8xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_10xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_12xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_14xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_16xk_ri )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_2xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_4xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_6xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_8xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_10xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_12xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_14xk_3m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_16xk_3m )
|
||||
|
||||
@@ -116,7 +116,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -210,7 +210,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -312,7 +312,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -422,7 +422,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -540,7 +540,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -666,7 +666,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -800,7 +800,7 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_4m )
|
||||
|
||||
|
||||
|
||||
@@ -942,5 +942,5 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_ri )
|
||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_4m )
|
||||
|
||||
@@ -43,12 +43,12 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p, inc_t psp, inc_t ldp \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_ref_2xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_4xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_6xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_8xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_10xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_12xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_14xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_16xk_ri3 )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_2xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_4xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_6xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_8xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_10xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_12xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_14xk_4m )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_16xk_4m )
|
||||
|
||||
@@ -106,84 +106,84 @@
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// packm_2xk_ri3 kernels
|
||||
// packm_2xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_2XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_2XK_RI3_KERNEL BLIS_CPACKM_2XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_2XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_2XK_3M_KERNEL BLIS_CPACKM_2XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_2XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_2XK_RI3_KERNEL BLIS_ZPACKM_2XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_2XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_2XK_3M_KERNEL BLIS_ZPACKM_2XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_4xk_ri3 kernels
|
||||
// packm_4xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_4XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_4XK_RI3_KERNEL BLIS_CPACKM_4XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_4XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_4XK_3M_KERNEL BLIS_CPACKM_4XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_4XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_4XK_RI3_KERNEL BLIS_ZPACKM_4XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_4XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_4XK_3M_KERNEL BLIS_ZPACKM_4XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_6xk_ri3 kernels
|
||||
// packm_6xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_6XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_6XK_RI3_KERNEL BLIS_CPACKM_6XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_6XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_6XK_3M_KERNEL BLIS_CPACKM_6XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_6XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_6XK_RI3_KERNEL BLIS_ZPACKM_6XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_6XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_6XK_3M_KERNEL BLIS_ZPACKM_6XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_8xk_ri3 kernels
|
||||
// packm_8xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_8XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_8XK_RI3_KERNEL BLIS_CPACKM_8XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_8XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_8XK_3M_KERNEL BLIS_CPACKM_8XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_8XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_8XK_RI3_KERNEL BLIS_ZPACKM_8XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_8XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_8XK_3M_KERNEL BLIS_ZPACKM_8XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_10xk_ri3 kernels
|
||||
// packm_10xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_10XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_10XK_RI3_KERNEL BLIS_CPACKM_10XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_10XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_10XK_3M_KERNEL BLIS_CPACKM_10XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_10XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_10XK_RI3_KERNEL BLIS_ZPACKM_10XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_10XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_10XK_3M_KERNEL BLIS_ZPACKM_10XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_12xk_ri3 kernels
|
||||
// packm_12xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_12XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_12XK_RI3_KERNEL BLIS_CPACKM_12XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_12XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_12XK_3M_KERNEL BLIS_CPACKM_12XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_12XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_12XK_RI3_KERNEL BLIS_ZPACKM_12XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_12XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_12XK_3M_KERNEL BLIS_ZPACKM_12XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_14xk_ri3 kernels
|
||||
// packm_14xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_14XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_14XK_RI3_KERNEL BLIS_CPACKM_14XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_14XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_14XK_3M_KERNEL BLIS_CPACKM_14XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_14XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_14XK_RI3_KERNEL BLIS_ZPACKM_14XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_14XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_14XK_3M_KERNEL BLIS_ZPACKM_14XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_16xk_ri3 kernels
|
||||
// packm_16xk_3m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_16XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_16XK_RI3_KERNEL BLIS_CPACKM_16XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_16XK_3M_KERNEL
|
||||
#define BLIS_CPACKM_16XK_3M_KERNEL BLIS_CPACKM_16XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_16XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_16XK_RI3_KERNEL BLIS_ZPACKM_16XK_RI3_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_16XK_3M_KERNEL
|
||||
#define BLIS_ZPACKM_16XK_3M_KERNEL BLIS_ZPACKM_16XK_3M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -106,84 +106,84 @@
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// packm_2xk_ri kernels
|
||||
// packm_2xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_2XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_2XK_RI_KERNEL BLIS_CPACKM_2XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_2XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_2XK_4M_KERNEL BLIS_CPACKM_2XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_2XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_2XK_RI_KERNEL BLIS_ZPACKM_2XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_2XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_2XK_4M_KERNEL BLIS_ZPACKM_2XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_4xk_ri kernels
|
||||
// packm_4xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_4XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_4XK_RI_KERNEL BLIS_CPACKM_4XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_4XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_4XK_4M_KERNEL BLIS_CPACKM_4XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_4XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_4XK_RI_KERNEL BLIS_ZPACKM_4XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_4XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_4XK_4M_KERNEL BLIS_ZPACKM_4XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_6xk_ri kernels
|
||||
// packm_6xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_6XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_6XK_RI_KERNEL BLIS_CPACKM_6XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_6XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_6XK_4M_KERNEL BLIS_CPACKM_6XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_6XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_6XK_RI_KERNEL BLIS_ZPACKM_6XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_6XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_6XK_4M_KERNEL BLIS_ZPACKM_6XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_8xk_ri kernels
|
||||
// packm_8xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_8XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_8XK_RI_KERNEL BLIS_CPACKM_8XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_8XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_8XK_4M_KERNEL BLIS_CPACKM_8XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_8XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_8XK_RI_KERNEL BLIS_ZPACKM_8XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_8XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_8XK_4M_KERNEL BLIS_ZPACKM_8XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_10xk_ri kernels
|
||||
// packm_10xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_10XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_10XK_RI_KERNEL BLIS_CPACKM_10XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_10XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_10XK_4M_KERNEL BLIS_CPACKM_10XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_10XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_10XK_RI_KERNEL BLIS_ZPACKM_10XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_10XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_10XK_4M_KERNEL BLIS_ZPACKM_10XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_12xk_ri kernels
|
||||
// packm_12xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_12XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_12XK_RI_KERNEL BLIS_CPACKM_12XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_12XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_12XK_4M_KERNEL BLIS_CPACKM_12XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_12XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_12XK_RI_KERNEL BLIS_ZPACKM_12XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_12XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_12XK_4M_KERNEL BLIS_ZPACKM_12XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_14xk_ri kernels
|
||||
// packm_14xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_14XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_14XK_RI_KERNEL BLIS_CPACKM_14XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_14XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_14XK_4M_KERNEL BLIS_CPACKM_14XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_14XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_14XK_RI_KERNEL BLIS_ZPACKM_14XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_14XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_14XK_4M_KERNEL BLIS_ZPACKM_14XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_16xk_ri kernels
|
||||
// packm_16xk_4m kernels
|
||||
|
||||
#ifndef BLIS_CPACKM_16XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_16XK_RI_KERNEL BLIS_CPACKM_16XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_CPACKM_16XK_4M_KERNEL
|
||||
#define BLIS_CPACKM_16XK_4M_KERNEL BLIS_CPACKM_16XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_16XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_16XK_RI_KERNEL BLIS_ZPACKM_16XK_RI_KERNEL_REF
|
||||
#ifndef BLIS_ZPACKM_16XK_4M_KERNEL
|
||||
#define BLIS_ZPACKM_16XK_4M_KERNEL BLIS_ZPACKM_16XK_4M_KERNEL_REF
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -194,85 +194,85 @@
|
||||
#define BLIS_CPACKM_16XK_KERNEL_REF bli_cpackm_ref_16xk
|
||||
#define BLIS_ZPACKM_16XK_KERNEL_REF bli_zpackm_ref_16xk
|
||||
|
||||
// packm_2xk_ri kernels
|
||||
// packm_2xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_2XK_RI_KERNEL_REF bli_cpackm_ref_2xk_ri
|
||||
#define BLIS_ZPACKM_2XK_RI_KERNEL_REF bli_zpackm_ref_2xk_ri
|
||||
#define BLIS_CPACKM_2XK_4M_KERNEL_REF bli_cpackm_ref_2xk_4m
|
||||
#define BLIS_ZPACKM_2XK_4M_KERNEL_REF bli_zpackm_ref_2xk_4m
|
||||
|
||||
// packm_4xk_ri kernels
|
||||
// packm_4xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_4XK_RI_KERNEL_REF bli_cpackm_ref_4xk_ri
|
||||
#define BLIS_ZPACKM_4XK_RI_KERNEL_REF bli_zpackm_ref_4xk_ri
|
||||
#define BLIS_CPACKM_4XK_4M_KERNEL_REF bli_cpackm_ref_4xk_4m
|
||||
#define BLIS_ZPACKM_4XK_4M_KERNEL_REF bli_zpackm_ref_4xk_4m
|
||||
|
||||
// packm_6xk_ri kernels
|
||||
// packm_6xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_6XK_RI_KERNEL_REF bli_cpackm_ref_6xk_ri
|
||||
#define BLIS_ZPACKM_6XK_RI_KERNEL_REF bli_zpackm_ref_6xk_ri
|
||||
#define BLIS_CPACKM_6XK_4M_KERNEL_REF bli_cpackm_ref_6xk_4m
|
||||
#define BLIS_ZPACKM_6XK_4M_KERNEL_REF bli_zpackm_ref_6xk_4m
|
||||
|
||||
// packm_8xk_ri kernels
|
||||
// packm_8xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_8XK_RI_KERNEL_REF bli_cpackm_ref_8xk_ri
|
||||
#define BLIS_ZPACKM_8XK_RI_KERNEL_REF bli_zpackm_ref_8xk_ri
|
||||
#define BLIS_CPACKM_8XK_4M_KERNEL_REF bli_cpackm_ref_8xk_4m
|
||||
#define BLIS_ZPACKM_8XK_4M_KERNEL_REF bli_zpackm_ref_8xk_4m
|
||||
|
||||
// packm_10xk_ri kernels
|
||||
// packm_10xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_10XK_RI_KERNEL_REF bli_cpackm_ref_10xk_ri
|
||||
#define BLIS_ZPACKM_10XK_RI_KERNEL_REF bli_zpackm_ref_10xk_ri
|
||||
#define BLIS_CPACKM_10XK_4M_KERNEL_REF bli_cpackm_ref_10xk_4m
|
||||
#define BLIS_ZPACKM_10XK_4M_KERNEL_REF bli_zpackm_ref_10xk_4m
|
||||
|
||||
// packm_12xk_ri kernels
|
||||
// packm_12xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_12XK_RI_KERNEL_REF bli_cpackm_ref_12xk_ri
|
||||
#define BLIS_ZPACKM_12XK_RI_KERNEL_REF bli_zpackm_ref_12xk_ri
|
||||
#define BLIS_CPACKM_12XK_4M_KERNEL_REF bli_cpackm_ref_12xk_4m
|
||||
#define BLIS_ZPACKM_12XK_4M_KERNEL_REF bli_zpackm_ref_12xk_4m
|
||||
|
||||
// packm_14xk_ri kernels
|
||||
// packm_14xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_14XK_RI_KERNEL_REF bli_cpackm_ref_14xk_ri
|
||||
#define BLIS_ZPACKM_14XK_RI_KERNEL_REF bli_zpackm_ref_14xk_ri
|
||||
#define BLIS_CPACKM_14XK_4M_KERNEL_REF bli_cpackm_ref_14xk_4m
|
||||
#define BLIS_ZPACKM_14XK_4M_KERNEL_REF bli_zpackm_ref_14xk_4m
|
||||
|
||||
// packm_16xk_ri kernels
|
||||
// packm_16xk_4m kernels
|
||||
|
||||
#define BLIS_CPACKM_16XK_RI_KERNEL_REF bli_cpackm_ref_16xk_ri
|
||||
#define BLIS_ZPACKM_16XK_RI_KERNEL_REF bli_zpackm_ref_16xk_ri
|
||||
#define BLIS_CPACKM_16XK_4M_KERNEL_REF bli_cpackm_ref_16xk_4m
|
||||
#define BLIS_ZPACKM_16XK_4M_KERNEL_REF bli_zpackm_ref_16xk_4m
|
||||
|
||||
// packm_2xk_ri3 kernels
|
||||
// packm_2xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_2XK_RI3_KERNEL_REF bli_cpackm_ref_2xk_ri3
|
||||
#define BLIS_ZPACKM_2XK_RI3_KERNEL_REF bli_zpackm_ref_2xk_ri3
|
||||
#define BLIS_CPACKM_2XK_3M_KERNEL_REF bli_cpackm_ref_2xk_3m
|
||||
#define BLIS_ZPACKM_2XK_3M_KERNEL_REF bli_zpackm_ref_2xk_3m
|
||||
|
||||
// packm_4xk_ri3 kernels
|
||||
// packm_4xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_4XK_RI3_KERNEL_REF bli_cpackm_ref_4xk_ri3
|
||||
#define BLIS_ZPACKM_4XK_RI3_KERNEL_REF bli_zpackm_ref_4xk_ri3
|
||||
#define BLIS_CPACKM_4XK_3M_KERNEL_REF bli_cpackm_ref_4xk_3m
|
||||
#define BLIS_ZPACKM_4XK_3M_KERNEL_REF bli_zpackm_ref_4xk_3m
|
||||
|
||||
// packm_6xk_ri3 kernels
|
||||
// packm_6xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_6XK_RI3_KERNEL_REF bli_cpackm_ref_6xk_ri3
|
||||
#define BLIS_ZPACKM_6XK_RI3_KERNEL_REF bli_zpackm_ref_6xk_ri3
|
||||
#define BLIS_CPACKM_6XK_3M_KERNEL_REF bli_cpackm_ref_6xk_3m
|
||||
#define BLIS_ZPACKM_6XK_3M_KERNEL_REF bli_zpackm_ref_6xk_3m
|
||||
|
||||
// packm_8xk_ri3 kernels
|
||||
// packm_8xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_8XK_RI3_KERNEL_REF bli_cpackm_ref_8xk_ri3
|
||||
#define BLIS_ZPACKM_8XK_RI3_KERNEL_REF bli_zpackm_ref_8xk_ri3
|
||||
#define BLIS_CPACKM_8XK_3M_KERNEL_REF bli_cpackm_ref_8xk_3m
|
||||
#define BLIS_ZPACKM_8XK_3M_KERNEL_REF bli_zpackm_ref_8xk_3m
|
||||
|
||||
// packm_10xk_ri3 kernels
|
||||
// packm_10xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_10XK_RI3_KERNEL_REF bli_cpackm_ref_10xk_ri3
|
||||
#define BLIS_ZPACKM_10XK_RI3_KERNEL_REF bli_zpackm_ref_10xk_ri3
|
||||
#define BLIS_CPACKM_10XK_3M_KERNEL_REF bli_cpackm_ref_10xk_3m
|
||||
#define BLIS_ZPACKM_10XK_3M_KERNEL_REF bli_zpackm_ref_10xk_3m
|
||||
|
||||
// packm_12xk_ri3 kernels
|
||||
// packm_12xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_12XK_RI3_KERNEL_REF bli_cpackm_ref_12xk_ri3
|
||||
#define BLIS_ZPACKM_12XK_RI3_KERNEL_REF bli_zpackm_ref_12xk_ri3
|
||||
#define BLIS_CPACKM_12XK_3M_KERNEL_REF bli_cpackm_ref_12xk_3m
|
||||
#define BLIS_ZPACKM_12XK_3M_KERNEL_REF bli_zpackm_ref_12xk_3m
|
||||
|
||||
// packm_14xk_ri3 kernels
|
||||
// packm_14xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_14XK_RI3_KERNEL_REF bli_cpackm_ref_14xk_ri3
|
||||
#define BLIS_ZPACKM_14XK_RI3_KERNEL_REF bli_zpackm_ref_14xk_ri3
|
||||
#define BLIS_CPACKM_14XK_3M_KERNEL_REF bli_cpackm_ref_14xk_3m
|
||||
#define BLIS_ZPACKM_14XK_3M_KERNEL_REF bli_zpackm_ref_14xk_3m
|
||||
|
||||
// packm_16xk_ri3 kernels
|
||||
// packm_16xk_3m kernels
|
||||
|
||||
#define BLIS_CPACKM_16XK_RI3_KERNEL_REF bli_cpackm_ref_16xk_ri3
|
||||
#define BLIS_ZPACKM_16XK_RI3_KERNEL_REF bli_zpackm_ref_16xk_ri3
|
||||
#define BLIS_CPACKM_16XK_3M_KERNEL_REF bli_cpackm_ref_16xk_3m
|
||||
#define BLIS_ZPACKM_16XK_3M_KERNEL_REF bli_zpackm_ref_16xk_3m
|
||||
|
||||
// unpack_2xk kernels
|
||||
|
||||
|
||||
Reference in New Issue
Block a user