mirror of
https://github.com/amd/blis.git
synced 2026-03-27 20:57:21 +00:00
Minor packm kernel type cleanup (void* -> ctype*).
Details: - Changed all void* function arguments in reference packm kernels to those of the native type (ctype*). These pointers no longer need to be void* and are better represented by their native types anyway. (See below for details.) Updated knl packm kernels accordingly. - In the definition of the PACKM_KER_PROT prototype macro template in frame/1m/bli_l1m_ker_prot.h, changed the pointer types for kappa, a, and p from void* to ctype*. They were originally void* because these function signatures had to share the same type so they could all be stored in a single array of that shared type, from which they were queried and called by packm_cxk(). This is no longer how the function pointers are stored, and so it no longer makes sense to force the caller of packm kernels to use void*, only so that the implementor of the packm kernels can typecast back to the native datatype within the kernel definition. This change has no effect internally within BLIS because currently all packm kernels are called after querying the function addresses from the context and then typecasting to the appropriate function pointer type, which is based upon type-specific function pointers like float* and double*. - Removed a comment in frame/1m/bli_l1m_ft_ker.h that was outdated and misleading due to changes to the handling of packm kernels since moving them into the context.
This commit is contained in:
@@ -72,11 +72,6 @@ INSERT_GENTDEF( packm )
|
||||
|
||||
// NOTE: the following macros generate packm kernel function type definitions
|
||||
// that are "ctyped" and void-typed, for each of the floating-point datatypes.
|
||||
// However, we will only make use of the void-typed definitions because the
|
||||
// functions such as bli_?packm_cxk() (currently) use arrays of function
|
||||
// pointers to store and access the function pointers for various unrolling
|
||||
// (register blocksize) values, and therefore they must all be of the same
|
||||
// type (hence the use of void* for kappa, a, and p).
|
||||
|
||||
// packm_ker
|
||||
|
||||
|
||||
@@ -48,9 +48,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -63,9 +63,9 @@ void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -80,9 +80,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -97,9 +97,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -115,9 +115,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
@@ -133,9 +133,9 @@ void PASTEMAC(ch,varname) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
|
||||
@@ -113,9 +113,9 @@ void bli_dpackm_knl_asm_8xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
double* restrict kappa_,
|
||||
double* restrict a_, inc_t inca_, inc_t lda_,
|
||||
double* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
@@ -364,9 +364,9 @@ void bli_dpackm_knl_asm_24xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
double* restrict kappa_,
|
||||
double* restrict a_, inc_t inca_, inc_t lda_,
|
||||
double* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
|
||||
@@ -115,9 +115,9 @@ void bli_spackm_knl_asm_16xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
float* restrict kappa_,
|
||||
float* restrict a_, inc_t inca_, inc_t lda_,
|
||||
float* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
@@ -382,9 +382,9 @@ void bli_spackm_knl_asm_24xk
|
||||
dim_t cdim_,
|
||||
dim_t n_,
|
||||
dim_t n_max_,
|
||||
void* restrict kappa_,
|
||||
void* restrict a_, inc_t inca_, inc_t lda_,
|
||||
void* restrict p_, inc_t ldp_,
|
||||
float* restrict kappa_,
|
||||
float* restrict a_, inc_t inca_, inc_t lda_,
|
||||
float* restrict p_, inc_t ldp_,
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
|
||||
@@ -44,9 +44,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -58,12 +58,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -96,8 +95,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -108,8 +107,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -124,7 +123,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -132,7 +131,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -261,9 +260,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -275,12 +274,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -317,10 +315,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -331,10 +329,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -349,7 +347,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -357,7 +355,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -494,9 +492,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -508,12 +506,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -554,12 +551,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -570,12 +567,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -590,7 +587,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -598,7 +595,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -743,9 +740,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -757,12 +754,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -807,14 +803,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -825,14 +821,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -847,7 +843,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -855,7 +851,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1008,9 +1004,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1022,12 +1018,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1076,16 +1071,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1096,16 +1091,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1120,7 +1115,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1128,7 +1123,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1289,9 +1284,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1303,12 +1298,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1361,18 +1355,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1383,18 +1377,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1409,7 +1403,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1417,7 +1411,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1586,9 +1580,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1600,12 +1594,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1662,20 +1655,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1686,20 +1679,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -1714,7 +1707,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1722,7 +1715,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1899,9 +1892,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
@@ -1913,12 +1906,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda1 = lda; \
|
||||
const inc_t ldp1 = ldp; \
|
||||
\
|
||||
ctype* restrict kappa_cast = ( ctype* )kappa; \
|
||||
ctype* restrict alpha1_ri = ( ctype* )a; \
|
||||
ctype* restrict pi1_ri = ( ctype* )p; \
|
||||
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1979,22 +1971,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -2005,22 +1997,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
|
||||
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
|
||||
\
|
||||
alpha1_ri += lda1; \
|
||||
pi1_ri += ldp1; \
|
||||
@@ -2035,7 +2027,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
const inc_t ldp2 = 2 * ldp; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype* kappa = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -2043,7 +2035,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
|
||||
@@ -43,16 +43,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -63,7 +62,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -71,7 +70,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -85,7 +84,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -102,7 +101,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -116,7 +115,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -255,16 +254,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -275,7 +273,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -285,7 +283,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -301,7 +299,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -320,7 +318,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -336,7 +334,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -475,16 +473,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -495,7 +492,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -507,7 +504,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -525,7 +522,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -546,7 +543,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -564,7 +561,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -703,16 +700,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -723,7 +719,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -737,7 +733,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -757,7 +753,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -780,7 +776,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -800,7 +796,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -939,16 +935,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -959,7 +954,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -975,7 +970,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -997,7 +992,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1022,7 +1017,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1044,7 +1039,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1183,16 +1178,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1203,7 +1197,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1221,7 +1215,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1245,7 +1239,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1272,7 +1266,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1296,7 +1290,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1435,16 +1429,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1455,7 +1448,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1475,7 +1468,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1501,7 +1494,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1530,7 +1523,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1556,7 +1549,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1695,16 +1688,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1715,7 +1707,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1737,7 +1729,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
|
||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1765,7 +1757,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
|
||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1796,7 +1788,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
|
||||
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1824,7 +1816,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
|
||||
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
|
||||
@@ -43,16 +43,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -62,7 +61,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -70,7 +69,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -83,7 +82,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -99,7 +98,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -112,7 +111,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -192,16 +191,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -211,7 +209,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -221,7 +219,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -236,7 +234,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -254,7 +252,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -269,7 +267,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -349,16 +347,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -368,7 +365,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -380,7 +377,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -397,7 +394,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -417,7 +414,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -434,7 +431,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -514,16 +511,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -533,7 +529,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -547,7 +543,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -566,7 +562,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -588,7 +584,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -607,7 +603,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -687,16 +683,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -706,7 +701,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -722,7 +717,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -743,7 +738,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -767,7 +762,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -788,7 +783,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -868,16 +863,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -887,7 +881,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -905,7 +899,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -928,7 +922,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -954,7 +948,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -977,7 +971,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1057,16 +1051,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1076,7 +1069,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1096,7 +1089,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1121,7 +1114,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1149,7 +1142,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1174,7 +1167,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1254,16 +1247,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t is_p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t is_p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
const inc_t inca2 = 2 * inca; \
|
||||
const inc_t lda2 = 2 * lda; \
|
||||
\
|
||||
ctype* kappa_cast = kappa; \
|
||||
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
|
||||
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
|
||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||
@@ -1273,7 +1265,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1295,7 +1287,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||
PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1322,7 +1314,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||
PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1352,7 +1344,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
@@ -1379,7 +1371,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||
\
|
||||
\
|
||||
alpha1_r += lda2; \
|
||||
alpha1_i += lda2; \
|
||||
pi1_r += ldp; \
|
||||
|
||||
@@ -46,13 +46,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -64,7 +63,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -109,24 +108,24 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -136,18 +135,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -204,7 +203,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -237,18 +236,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -258,12 +257,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -332,13 +331,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -350,7 +348,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -419,36 +417,36 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -458,30 +456,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -538,7 +536,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -571,18 +569,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
|
||||
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -592,12 +590,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
|
||||
@@ -44,13 +44,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -59,7 +58,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -108,8 +107,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -119,8 +118,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -193,13 +192,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -208,7 +206,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -263,9 +261,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -275,9 +273,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -350,13 +348,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -365,7 +362,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -416,10 +413,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -429,10 +426,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -505,19 +502,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -556,12 +552,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -571,12 +567,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -649,13 +645,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
@@ -664,7 +659,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -731,14 +726,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -748,14 +743,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -828,19 +823,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -887,16 +881,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -906,16 +900,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -988,19 +982,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1051,18 +1044,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1072,18 +1065,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1156,19 +1149,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1223,20 +1215,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1246,20 +1238,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1332,19 +1324,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1403,22 +1394,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1428,22 +1419,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1516,19 +1507,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
dim_t cdim, \
|
||||
dim_t n, \
|
||||
dim_t n_max, \
|
||||
void* restrict kappa, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
if ( cdim == mnr ) \
|
||||
{ \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
@@ -1603,30 +1593,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
@@ -1636,30 +1626,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( dim_t k = n; k != 0; --k ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
|
||||
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += ldp; \
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -41,17 +41,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -82,8 +81,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -93,8 +92,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -115,17 +114,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -160,10 +158,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -173,10 +171,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -197,17 +195,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -246,12 +243,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -261,12 +258,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -287,17 +284,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -340,14 +336,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -357,14 +353,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -385,17 +381,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -442,16 +437,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -461,16 +456,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -491,17 +486,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -552,18 +546,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -573,18 +567,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -605,17 +599,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -670,20 +663,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -693,20 +686,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -727,17 +720,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t n, \
|
||||
void* restrict kappa, \
|
||||
void* restrict p, inc_t ldp, \
|
||||
void* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict kappa, \
|
||||
ctype* restrict p, inc_t ldp, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
|
||||
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
|
||||
{ \
|
||||
if ( bli_is_conj( conjp ) ) \
|
||||
{ \
|
||||
@@ -796,22 +788,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
@@ -821,22 +813,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
{ \
|
||||
for ( ; n != 0; --n ) \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
|
||||
\
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
|
||||
Reference in New Issue
Block a user