Fixed accidental breakage in 645d771.

Details:
- In trying to clean up kappa_cast variables in the reference packm
  kernels, which I initally believed to be redundant given the other
  void* -> ctype* changes in 645d771, I accidentally ended up violating
  restrict semantics for 1e/1r packing and possibly other packm kernels.
  (Normally, my pre-commit testsuite run would have caught this, but I
  was unknowingly using an edited input.operations file in which I'd
  disabled most tests as part of unrelated work.) This commit reverts
  the kappa_cast changes in 645d771.
This commit is contained in:
Field G. Van Zee
2020-09-12 17:00:47 -05:00
parent 645d771a14
commit 8ebb3b60e1
7 changed files with 1332 additions and 1280 deletions

View File

@@ -58,11 +58,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -95,8 +96,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -107,8 +108,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -123,7 +124,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -131,7 +132,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -274,11 +275,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -315,10 +317,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -329,10 +331,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -347,7 +349,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -355,7 +357,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -506,11 +508,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -551,12 +554,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -567,12 +570,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -587,7 +590,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -595,7 +598,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -754,11 +757,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -803,14 +807,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -821,14 +825,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -843,7 +847,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -851,7 +855,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1018,11 +1022,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1071,16 +1076,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1091,16 +1096,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1115,7 +1120,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1123,7 +1128,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1298,11 +1303,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1355,18 +1361,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1377,18 +1383,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1403,7 +1409,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1411,7 +1417,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1594,11 +1600,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1655,20 +1662,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1679,20 +1686,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1707,7 +1714,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1715,7 +1722,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1906,11 +1913,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda1 = lda; \
const inc_t ldp1 = ldp; \
\
ctype* restrict kappa_cast = ( ctype* )kappa; \
ctype* restrict alpha1_ri = ( ctype* )a; \
ctype* restrict pi1_ri = ( ctype* )p; \
ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1971,22 +1979,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal2j1es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -1997,22 +2005,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal21es)( *kappa, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \
PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \
\
alpha1_ri += lda1; \
pi1_ri += ldp1; \
@@ -2027,7 +2035,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t lda2 = 2 * lda; \
const inc_t ldp2 = 2 * ldp; \
\
ctype* kappa = kappa; \
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -2035,7 +2043,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype_r* restrict pi1_r = ( ctype_r* )p; \
ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \

View File

@@ -52,6 +52,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -62,7 +63,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -70,7 +71,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -84,7 +85,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -101,7 +102,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -115,7 +116,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -263,6 +264,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -273,7 +275,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -283,7 +285,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -299,7 +301,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -318,7 +320,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -334,7 +336,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -482,6 +484,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -492,7 +495,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -504,7 +507,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -522,7 +525,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -543,7 +546,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -561,7 +564,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -709,6 +712,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -719,7 +723,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -733,7 +737,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -753,7 +757,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -776,7 +780,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -796,7 +800,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -944,6 +948,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -954,7 +959,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -970,7 +975,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -992,7 +997,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1017,7 +1022,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1039,7 +1044,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1187,6 +1192,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1197,7 +1203,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1215,7 +1221,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1239,7 +1245,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1266,7 +1272,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1290,7 +1296,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1438,6 +1444,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1448,7 +1455,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1468,7 +1475,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1494,7 +1501,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1523,7 +1530,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1549,7 +1556,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1697,6 +1704,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1707,7 +1715,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1729,7 +1737,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,copyjri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1757,7 +1765,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,copyri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1788,7 +1796,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1816,7 +1824,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \
PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \

View File

@@ -52,6 +52,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -61,7 +62,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -69,7 +70,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -82,7 +83,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -98,7 +99,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -111,7 +112,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -200,6 +201,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -209,7 +211,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -219,7 +221,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -234,7 +236,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -252,7 +254,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -267,7 +269,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -356,6 +358,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -365,7 +368,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -377,7 +380,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -394,7 +397,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -414,7 +417,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -431,7 +434,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -520,6 +523,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -529,7 +533,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -543,7 +547,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -562,7 +566,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -584,7 +588,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -603,7 +607,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -692,6 +696,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -701,7 +706,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -717,7 +722,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -738,7 +743,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -762,7 +767,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -783,7 +788,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -872,6 +877,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -881,7 +887,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -899,7 +905,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -922,7 +928,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -948,7 +954,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -971,7 +977,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1060,6 +1066,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1069,7 +1076,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1089,7 +1096,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1114,7 +1121,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1142,7 +1149,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1167,7 +1174,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1256,6 +1263,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
const inc_t inca2 = 2 * inca; \
const inc_t lda2 = 2 * lda; \
\
ctype* kappa_cast = kappa; \
ctype_r* restrict kappa_r = ( ctype_r* )kappa; \
ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
@@ -1265,7 +1273,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1287,7 +1295,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1314,7 +1322,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1344,7 +1352,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \
@@ -1371,7 +1379,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
\
\
alpha1_r += lda2; \
alpha1_i += lda2; \
pi1_r += ldp; \

View File

@@ -52,6 +52,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -63,7 +64,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -108,24 +109,24 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -135,18 +136,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -203,7 +204,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -236,18 +237,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -257,12 +258,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -337,6 +338,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -348,7 +350,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -417,36 +419,36 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -456,30 +458,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 23) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -536,7 +538,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -569,18 +571,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
else /* if ( !PASTEMAC(ch,eq1)( *kappa ) ) */ \
else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \
{ \
if ( bli_is_conj( conja ) ) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -590,12 +592,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \

View File

@@ -50,6 +50,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -58,7 +59,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -107,8 +108,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -118,8 +119,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -198,6 +199,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -206,7 +208,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -261,9 +263,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -273,9 +275,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -354,6 +356,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -362,7 +365,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -413,10 +416,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -426,10 +429,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -508,12 +511,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -552,12 +556,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -567,12 +571,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -651,6 +655,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
@@ -659,7 +664,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -726,14 +731,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -743,14 +748,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -829,12 +834,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -881,16 +887,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -900,16 +906,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -988,12 +994,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1044,18 +1051,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1065,18 +1072,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1155,12 +1162,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1215,20 +1223,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1238,20 +1246,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1330,12 +1338,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1394,22 +1403,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1419,22 +1428,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1513,12 +1522,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
if ( cdim == mnr ) \
{ \
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conja ) ) \
{ \
@@ -1593,30 +1603,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2js)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
\
alpha1 += lda; \
pi1 += ldp; \
@@ -1626,30 +1636,30 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( dim_t k = n; k != 0; --k ) \
{ \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2s)( *kappa, *(alpha1 +23*inca), *(pi1 +23) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \
PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \
\
alpha1 += lda; \
pi1 += ldp; \

File diff suppressed because it is too large Load Diff

View File

@@ -41,16 +41,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -81,8 +82,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -92,8 +93,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -114,16 +115,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -158,10 +160,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -171,10 +173,10 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -195,16 +197,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -243,12 +246,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -258,12 +261,12 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -284,16 +287,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -336,14 +340,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -353,14 +357,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -381,16 +385,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -437,16 +442,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -456,16 +461,16 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -486,16 +491,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -546,18 +552,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -567,18 +573,18 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -599,16 +605,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -663,20 +670,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -686,20 +693,20 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -720,16 +727,17 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
conj_t conjp, \
dim_t n, \
ctype* restrict kappa, \
ctype* restrict p, inc_t ldp, \
ctype* restrict a, inc_t inca, inc_t lda, \
void* restrict kappa, \
void* restrict p, inc_t ldp, \
void* restrict a, inc_t inca, inc_t lda, \
cntx_t* restrict cntx \
) \
{ \
ctype* restrict kappa_cast = kappa; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
\
if ( PASTEMAC(ch,eq1)( *kappa ) ) \
if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
{ \
if ( bli_is_conj( conjp ) ) \
{ \
@@ -788,22 +796,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \
@@ -813,22 +821,22 @@ void PASTEMAC3(ch,opname,arch,suf) \
{ \
for ( ; n != 0; --n ) \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa, *(pi1 + 15), *(alpha1 + 15*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
\
pi1 += ldp; \
alpha1 += lda; \