Minor packm kernel type cleanup (void* -> ctype*).

Details:
- Changed all void* function arguments in reference packm kernels to
  those of the native type (ctype*). These pointers no longer need to
  be void* and are better represented by their native types anyway.
  (See below for details.) Updated knl packm kernels accordingly.
- In the definition of the PACKM_KER_PROT prototype macro template in
  frame/1m/bli_l1m_ker_prot.h, changed the pointer types for kappa, a,
  and p from void* to ctype*. They were originally void* because these
  function signatures had to share the same type so they could all be
  stored in a single array of that shared type, from which they were
  queried and called by packm_cxk(). This is no longer how the function
  pointers are stored, and so it no longer makes sense to force the
  caller of packm kernels to use void*, only so that the implementor
  of the packm kernels can typecast back to the native datatype within
  the kernel definition. This change has no effect internally within
  BLIS because currently all packm kernels are called after querying
  the function addresses from the context and then typecasting to the
  appropriate function pointer type, which is based upon type-specific
  function pointers like float* and double*.
- Removed a comment in frame/1m/bli_l1m_ft_ker.h that was outdated and
  misleading due to changes to the handling of packm kernels since
  moving them into the context.
This commit is contained in:
Field G. Van Zee
2020-09-12 15:31:56 -05:00
parent 54bf6c3554
commit 645d771a14
11 changed files with 1442 additions and 1499 deletions

View File

@@ -72,11 +72,6 @@ INSERT_GENTDEF( packm )
// NOTE: the following macros generate packm kernel function type definitions
// that are "ctyped" and void-typed, for each of the floating-point datatypes.
// However, we will only make use of the void-typed definitions because the
// functions such as bli_?packm_cxk() (currently) use arrays of function
// pointers to store and access the function pointers for various unrolling
// (register blocksize) values, and therefore they must all be of the same
// type (hence the use of void* for kappa, a, and p).
// packm_ker

View File

@@ -48,9 +48,9 @@ void PASTEMAC(ch,varname) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
);
@@ -63,9 +63,9 @@ void PASTEMAC(ch,varname) \
( \
conj_t conja, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
);
@@ -80,9 +80,9 @@ void PASTEMAC(ch,varname) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
);
@@ -97,9 +97,9 @@ void PASTEMAC(ch,varname) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
);
@@ -115,9 +115,9 @@ void PASTEMAC(ch,varname) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
);
@@ -133,9 +133,9 @@ void PASTEMAC(ch,varname) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
);

View File

@@ -113,9 +113,9 @@ void bli_dpackm_knl_asm_8xk
dim_t cdim_,
dim_t n_,
dim_t n_max_,
void* restrict kappa_,
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_,
double* restrict kappa_,
double* restrict a_, inc_t inca_, inc_t lda_,
double* restrict p_, inc_t ldp_,
cntx_t* restrict cntx
)
{
@@ -364,9 +364,9 @@ void bli_dpackm_knl_asm_24xk
dim_t cdim_,
dim_t n_,
dim_t n_max_,
void* restrict kappa_,
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_,
double* restrict kappa_,
double* restrict a_, inc_t inca_, inc_t lda_,
double* restrict p_, inc_t ldp_,
cntx_t* restrict cntx
)
{

View File

@@ -115,9 +115,9 @@ void bli_spackm_knl_asm_16xk
dim_t cdim_,
dim_t n_,
dim_t n_max_,
void* restrict kappa_,
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_,
float* restrict kappa_,
float* restrict a_, inc_t inca_, inc_t lda_,
float* restrict p_, inc_t ldp_,
cntx_t* restrict cntx
)
{
@@ -382,9 +382,9 @@ void bli_spackm_knl_asm_24xk
dim_t cdim_,
dim_t n_,
dim_t n_max_,
void* restrict kappa_,
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_,
float* restrict kappa_,
float* restrict a_, inc_t inca_, inc_t lda_,
float* restrict p_, inc_t ldp_,
cntx_t* restrict cntx
)
{

View File

@@ -44,9 +44,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -58,12 +58,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -96,8 +95,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -108,8 +107,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -124,7 +123,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -132,7 +131,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -261,9 +260,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -275,12 +274,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -317,10 +315,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -331,10 +329,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -349,7 +347,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -357,7 +355,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -494,9 +492,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -508,12 +506,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -554,12 +551,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -570,12 +567,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -590,7 +587,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -598,7 +595,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -743,9 +740,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -757,12 +754,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -807,14 +803,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -825,14 +821,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -847,7 +843,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -855,7 +851,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1008,9 +1004,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -1022,12 +1018,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1076,16 +1071,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1096,16 +1091,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1120,7 +1115,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1128,7 +1123,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1289,9 +1284,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -1303,12 +1298,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1361,18 +1355,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1383,18 +1377,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1409,7 +1403,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1417,7 +1411,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1586,9 +1580,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -1600,12 +1594,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1662,20 +1655,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1686,20 +1679,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1714,7 +1707,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1722,7 +1715,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1899,9 +1892,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
@@ -1913,12 +1906,11 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1979,22 +1971,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -2005,22 +1997,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -2035,7 +2027,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa_cast = kappa; \
ctype* kappa = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -2043,7 +2035,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \

View File

@@ -43,16 +43,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -63,7 +62,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -71,7 +70,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -85,7 +84,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -102,7 +101,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -116,7 +115,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -255,16 +254,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -275,7 +273,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -285,7 +283,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -301,7 +299,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -320,7 +318,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -336,7 +334,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -475,16 +473,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -495,7 +492,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -507,7 +504,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -525,7 +522,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -546,7 +543,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -564,7 +561,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -703,16 +700,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -723,7 +719,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -737,7 +733,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -757,7 +753,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -780,7 +776,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -800,7 +796,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -939,16 +935,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -959,7 +954,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -975,7 +970,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -997,7 +992,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1022,7 +1017,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1044,7 +1039,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1183,16 +1178,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1203,7 +1197,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1221,7 +1215,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1245,7 +1239,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1272,7 +1266,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1296,7 +1290,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1435,16 +1429,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1455,7 +1448,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1475,7 +1468,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1501,7 +1494,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1530,7 +1523,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1556,7 +1549,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1695,16 +1688,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1715,7 +1707,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1737,7 +1729,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1765,7 +1757,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1796,7 +1788,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1824,7 +1816,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \

View File

@@ -43,16 +43,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -62,7 +61,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -70,7 +69,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -83,7 +82,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -99,7 +98,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -112,7 +111,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -192,16 +191,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -211,7 +209,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -221,7 +219,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -236,7 +234,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -254,7 +252,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -269,7 +267,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -349,16 +347,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -368,7 +365,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -380,7 +377,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -397,7 +394,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -417,7 +414,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -434,7 +431,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -514,16 +511,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -533,7 +529,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -547,7 +543,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -566,7 +562,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -588,7 +584,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -607,7 +603,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -687,16 +683,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -706,7 +701,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -722,7 +717,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -743,7 +738,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -767,7 +762,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -788,7 +783,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -868,16 +863,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -887,7 +881,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -905,7 +899,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -928,7 +922,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -954,7 +948,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -977,7 +971,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1057,16 +1051,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1076,7 +1069,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1096,7 +1089,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1121,7 +1114,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1149,7 +1142,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1174,7 +1167,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1254,16 +1247,15 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t is_p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t is_p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1273,7 +1265,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1295,7 +1287,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1322,7 +1314,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1352,7 +1344,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1379,7 +1371,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \

View File

@@ -46,13 +46,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -64,7 +63,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -109,24 +108,24 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -136,18 +135,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -204,7 +203,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -237,18 +236,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -258,12 +257,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -332,13 +331,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -350,7 +348,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -419,36 +417,36 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -458,30 +456,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -538,7 +536,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -571,18 +569,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -592,12 +590,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \

View File

@@ -44,13 +44,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -59,7 +58,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -108,8 +107,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -119,8 +118,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -193,13 +192,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -208,7 +206,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -263,9 +261,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -275,9 +273,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -350,13 +348,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -365,7 +362,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -416,10 +413,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -429,10 +426,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -505,19 +502,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -556,12 +552,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -571,12 +567,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -649,13 +645,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -664,7 +659,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -731,14 +726,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -748,14 +743,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -828,19 +823,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -887,16 +881,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -906,16 +900,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -988,19 +982,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1051,18 +1044,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1072,18 +1065,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1156,19 +1149,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1223,20 +1215,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1246,20 +1238,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1332,19 +1324,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1403,22 +1394,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1428,22 +1419,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1516,19 +1507,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
dim_t cdim, \
dim_t n, \
dim_t n_max, \
void* restrict kappa, \
void* restrict a, inc_t inca, inc_t lda, \
void* restrict p, inc_t ldp, \
ctype* restrict kappa, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict p, inc_t ldp, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1603,30 +1593,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1636,30 +1626,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
\
alpha1 += lda; \
pi1 += ldp; \

File diff suppressed because it is too large Load Diff

View File

@@ -41,17 +41,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -82,8 +81,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -93,8 +92,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -115,17 +114,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -160,10 +158,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -173,10 +171,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -197,17 +195,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -246,12 +243,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -261,12 +258,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -287,17 +284,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -340,14 +336,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -357,14 +353,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -385,17 +381,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -442,16 +437,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -461,16 +456,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -491,17 +486,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -552,18 +546,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -573,18 +567,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -605,17 +599,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -670,20 +663,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -693,20 +686,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -727,17 +720,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -796,22 +788,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -821,22 +813,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \