mirror of
https://github.com/amd/blis.git
synced 2026-04-30 12:31:11 +00:00
Minor packm kernel type cleanup (void* -> ctype*).
Details: - Changed all void* function arguments in reference packm kernels to those of the native type (ctype*). These pointers no longer need to be void* and are better represented by their native types anyway. (See below for details.) Updated knl packm kernels accordingly. - In the definition of the PACKM_KER_PROT prototype macro template in frame/1m/bli_l1m_ker_prot.h, changed the pointer types for kappa, a, and p from void* to ctype*. They were originally void* because these function signatures had to share the same type so they could all be stored in a single array of that shared type, from which they were queried and called by packm_cxk(). This is no longer how the function pointers are stored, and so it no longer makes sense to force the caller of packm kernels to use void*, only so that the implementor of the packm kernels can typecast back to the native datatype within the kernel definition. This change has no effect internally within BLIS because currently all packm kernels are called after querying the function addresses from the context and then typecasting to the appropriate function pointer type, which is based upon type-specific function pointers like float* and double*. - Removed a comment in frame/1m/bli_l1m_ft_ker.h that was outdated and misleading due to changes to the handling of packm kernels since moving them into the context.
This commit is contained in:
@@ -72,11 +72,6 @@ INSERT_GENTDEF( packm )
|
||||
|
||||
// NOTE: the following macros generate packm kernel function type definitions
|
||||
// that are "ctyped" and void-typed, for each of the floating-point datatypes.
|
||||
// However, we will only make use of the void-typed definitions because the
|
||||
// functions such as bli_?packm_cxk() (currently) use arrays of function
|
||||
// pointers to store and access the function pointers for various unrolling
|
||||
// (register blocksize) values, and therefore they must all be of the same
|
||||
// type (hence the use of void* for kappa, a, and p).
|
||||
|
||||
// packm_ker
|
||||
|
||||
|
||||
@@ -48,9 +48,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -63,9 +63,9 @@ void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -80,9 +80,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -97,9 +97,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -115,9 +115,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -133,9 +133,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
|
||||
@@ -113,9 +113,9 @@ void bli_dpackm_knl_asm_8xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
double* restrict kappa_,
|
||||
double* restrict a_, inc_t inca_, inc_t lda_,
|
||||
double* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
@@ -364,9 +364,9 @@ void bli_dpackm_knl_asm_24xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
double* restrict kappa_,
|
||||
double* restrict a_, inc_t inca_, inc_t lda_,
|
||||
double* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
|
||||
@@ -115,9 +115,9 @@ void bli_spackm_knl_asm_16xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
float* restrict kappa_,
|
||||
float* restrict a_, inc_t inca_, inc_t lda_,
|
||||
float* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
@@ -382,9 +382,9 @@ void bli_spackm_knl_asm_24xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
float* restrict kappa_,
|
||||
float* restrict a_, inc_t inca_, inc_t lda_,
|
||||
float* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
|
||||
@@ -44,9 +44,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -58,12 +58,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -96,8 +95,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -108,8 +107,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -124,7 +123,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -132,7 +131,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -261,9 +260,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -275,12 +274,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -317,10 +315,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -331,10 +329,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -349,7 +347,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -357,7 +355,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -494,9 +492,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -508,12 +506,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -554,12 +551,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -570,12 +567,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -590,7 +587,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -598,7 +595,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -743,9 +740,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -757,12 +754,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -807,14 +803,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -825,14 +821,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -847,7 +843,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -855,7 +851,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1008,9 +1004,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1022,12 +1018,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1076,16 +1071,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1096,16 +1091,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1120,7 +1115,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1128,7 +1123,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1289,9 +1284,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1303,12 +1298,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1361,18 +1355,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1383,18 +1377,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1409,7 +1403,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1417,7 +1411,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1586,9 +1580,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1600,12 +1594,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1662,20 +1655,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1686,20 +1679,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1714,7 +1707,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1722,7 +1715,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1899,9 +1892,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1913,12 +1906,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1979,22 +1971,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -2005,22 +1997,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -2035,7 +2027,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -2043,7 +2035,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
|
||||
@@ -43,16 +43,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -63,7 +62,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -255,16 +254,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -275,7 +273,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -475,16 +473,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -495,7 +492,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -703,16 +700,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -723,7 +719,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -939,16 +935,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -959,7 +954,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1183,16 +1178,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1203,7 +1197,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1435,16 +1429,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1455,7 +1448,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1695,16 +1688,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1715,7 +1707,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
|
||||
@@ -43,16 +43,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -62,7 +61,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -192,16 +191,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -211,7 +209,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -349,16 +347,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -368,7 +365,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -514,16 +511,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -533,7 +529,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -687,16 +683,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -706,7 +701,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -868,16 +863,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -887,7 +881,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1057,16 +1051,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1076,7 +1069,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1254,16 +1247,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1273,7 +1265,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
|
||||
@@ -46,13 +46,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -64,7 +63,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -109,24 +108,24 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -136,18 +135,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -204,7 +203,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -237,18 +236,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -258,12 +257,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -332,13 +331,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -350,7 +348,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -419,36 +417,36 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -458,30 +456,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -538,7 +536,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -571,18 +569,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -592,12 +590,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
|
||||
@@ -44,13 +44,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -59,7 +58,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -108,8 +107,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -119,8 +118,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -193,13 +192,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -208,7 +206,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -263,9 +261,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -275,9 +273,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -350,13 +348,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -365,7 +362,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -416,10 +413,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -429,10 +426,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -505,19 +502,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -556,12 +552,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -571,12 +567,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -649,13 +645,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -664,7 +659,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -731,14 +726,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -748,14 +743,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -828,19 +823,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -887,16 +881,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -906,16 +900,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -988,19 +982,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1051,18 +1044,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1072,18 +1065,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1156,19 +1149,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1223,20 +1215,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1246,20 +1238,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1332,19 +1324,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1403,22 +1394,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1428,22 +1419,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1516,19 +1507,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1603,30 +1593,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1636,30 +1626,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -41,17 +41,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -82,8 +81,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -93,8 +92,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -115,17 +114,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -160,10 +158,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -173,10 +171,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -197,17 +195,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -246,12 +243,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -261,12 +258,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -287,17 +284,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -340,14 +336,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -357,14 +353,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -385,17 +381,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -442,16 +437,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -461,16 +456,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -491,17 +486,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -552,18 +546,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -573,18 +567,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -605,17 +599,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -670,20 +663,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -693,20 +686,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -727,17 +720,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -796,22 +788,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -821,22 +813,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
|
||||
Reference in New Issue
Block a user