mirror of
https://github.com/amd/blis.git
synced 2026-05-13 10:35:38 +00:00
Reorganized packm ukernels.
Details: - Previously, packm micro-kernels were organized by the implied register blocksize (panel dimension) assumed by the kernel, meaning conventional, ri, and ri3 variations of some micro-kernel size were housed in the same file. This commit reorganizes the micro-kernels so that all sizes reside in the same file for each format type (conventional, ri, and ri3).
This commit is contained in:
@@ -53,12 +53,3 @@
|
|||||||
#include "bli_packm_cxk_ri.h"
|
#include "bli_packm_cxk_ri.h"
|
||||||
#include "bli_packm_cxk_ri3.h"
|
#include "bli_packm_cxk_ri3.h"
|
||||||
|
|
||||||
#include "bli_packm_ref_2xk.h"
|
|
||||||
#include "bli_packm_ref_4xk.h"
|
|
||||||
#include "bli_packm_ref_6xk.h"
|
|
||||||
#include "bli_packm_ref_8xk.h"
|
|
||||||
#include "bli_packm_ref_10xk.h"
|
|
||||||
#include "bli_packm_ref_12xk.h"
|
|
||||||
#include "bli_packm_ref_14xk.h"
|
|
||||||
#include "bli_packm_ref_16xk.h"
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,6 +32,9 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "bli_packm_ref_cxk.h"
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
#define GENTPROT( ctype, ch, varname ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -32,6 +32,9 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "bli_packm_ref_cxk_ri.h"
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
#undef GENTPROTCO
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -32,6 +32,9 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "bli_packm_ref_cxk_ri3.h"
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
#undef GENTPROTCO
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -1,378 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_10xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#undef GENTPROT
|
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_10xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_10xk_ri )
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_10xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,402 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_12xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#undef GENTPROT
|
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_12xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_12xk_ri )
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_12xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,426 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_14xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#undef GENTPROT
|
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_14xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_14xk_ri )
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_14xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,450 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +14*inca), *(pi1 +14) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 +15*inca), *(pi1 +15) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +14*inca), *(pi1 +14) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 +15*inca), *(pi1 +15) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_16xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#undef GENTPROT
|
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_16xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_16xk_ri )
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_16xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,282 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_2xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,306 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_4xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,330 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_6xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,354 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "blis.h"
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
|
||||||
#define GENTFUNC( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
ctype* restrict beta_cast = beta; \
|
|
||||||
ctype* restrict alpha1 = a; \
|
|
||||||
ctype* restrict pi1 = p; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1 += lda; \
|
|
||||||
pi1 += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( packm_ref_8xk )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_ri )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNCCO
|
|
||||||
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
) \
|
|
||||||
{ \
|
|
||||||
const inc_t inca2 = 2 * inca; \
|
|
||||||
const inc_t lda2 = 2 * lda; \
|
|
||||||
\
|
|
||||||
ctype* beta_cast = beta; \
|
|
||||||
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
|
||||||
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
|
||||||
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
|
||||||
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
|
||||||
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
|
||||||
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
|
||||||
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
|
||||||
\
|
|
||||||
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if ( bli_is_conj( conja ) ) \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
for ( ; n != 0; --n ) \
|
|
||||||
{ \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
|
||||||
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
|
||||||
\
|
|
||||||
alpha1_r += lda2; \
|
|
||||||
alpha1_i += lda2; \
|
|
||||||
pi1_r += ldp; \
|
|
||||||
pi1_i += ldp; \
|
|
||||||
pi1_ri += ldp; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_ri3 )
|
|
||||||
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
BLIS
|
|
||||||
An object-based framework for developing high-performance BLAS-like
|
|
||||||
libraries.
|
|
||||||
|
|
||||||
Copyright (C) 2014, The University of Texas at Austin
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
- Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
- Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
- Neither the name of The University of Texas at Austin nor the names
|
|
||||||
of its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#undef GENTPROT
|
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_8xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_8xk_ri )
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_8xk_ri3 )
|
|
||||||
|
|
||||||
826
frame/1m/packm/ukernels/bli_packm_ref_cxk.c
Normal file
826
frame/1m/packm/ukernels/bli_packm_ref_cxk.c
Normal file
@@ -0,0 +1,826 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
BLIS
|
||||||
|
An object-based framework for developing high-performance BLAS-like
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
Copyright (C) 2014, The University of Texas at Austin
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
- Neither the name of The University of Texas at Austin nor the names
|
||||||
|
of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "blis.h"
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_2xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_4xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_6xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_8xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_10xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_12xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_14xk )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype* restrict beta_cast = beta; \
|
||||||
|
ctype* restrict alpha1 = a; \
|
||||||
|
ctype* restrict pi1 = p; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||||
|
PASTEMAC(ch,copyjs)( *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||||
|
PASTEMAC(ch,copys)( *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||||
|
PASTEMAC(ch,scal2js)( *beta_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +10*inca), *(pi1 +10) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +11*inca), *(pi1 +11) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +12*inca), *(pi1 +12) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +13*inca), *(pi1 +13) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +14*inca), *(pi1 +14) ); \
|
||||||
|
PASTEMAC(ch,scal2s)( *beta_cast, *(alpha1 +15*inca), *(pi1 +15) ); \
|
||||||
|
\
|
||||||
|
alpha1 += lda; \
|
||||||
|
pi1 += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC0( packm_ref_16xk )
|
||||||
|
|
||||||
@@ -44,20 +44,11 @@ void PASTEMAC(ch,varname)( \
|
|||||||
);
|
);
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_2xk )
|
INSERT_GENTPROT_BASIC( packm_ref_2xk )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_4xk )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_6xk )
|
||||||
#undef GENTPROTCO
|
INSERT_GENTPROT_BASIC( packm_ref_8xk )
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
INSERT_GENTPROT_BASIC( packm_ref_10xk )
|
||||||
\
|
INSERT_GENTPROT_BASIC( packm_ref_12xk )
|
||||||
void PASTEMAC(ch,varname)( \
|
INSERT_GENTPROT_BASIC( packm_ref_14xk )
|
||||||
conj_t conja, \
|
INSERT_GENTPROT_BASIC( packm_ref_16xk )
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t psp, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_2xk_ri )
|
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_2xk_ri3 )
|
|
||||||
|
|
||||||
946
frame/1m/packm/ukernels/bli_packm_ref_cxk_ri.c
Normal file
946
frame/1m/packm/ukernels/bli_packm_ref_cxk_ri.c
Normal file
@@ -0,0 +1,946 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
BLIS
|
||||||
|
An object-based framework for developing high-performance BLAS-like
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
Copyright (C) 2014, The University of Texas at Austin
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
- Neither the name of The University of Texas at Austin nor the names
|
||||||
|
of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "blis.h"
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_ri )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||||
|
PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||||
|
PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||||
|
PASTEMAC(ch,scal2jris)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \
|
||||||
|
PASTEMAC(ch,scal2ris)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_ri )
|
||||||
|
|
||||||
@@ -35,20 +35,6 @@
|
|||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
#define GENTPROT( ctype, ch, varname ) \
|
||||||
\
|
\
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_4xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
void PASTEMAC(ch,varname)( \
|
||||||
conj_t conja, \
|
conj_t conja, \
|
||||||
dim_t n, \
|
dim_t n, \
|
||||||
@@ -57,7 +43,12 @@ void PASTEMAC(ch,varname)( \
|
|||||||
void* p, inc_t psp, inc_t ldp \
|
void* p, inc_t psp, inc_t ldp \
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_4xk_ri )
|
INSERT_GENTPROT_BASIC( packm_ref_2xk_ri )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_4xk_ri )
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_4xk_ri3 )
|
INSERT_GENTPROT_BASIC( packm_ref_6xk_ri )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_8xk_ri )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_10xk_ri )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_12xk_ri )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_14xk_ri )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_16xk_ri )
|
||||||
|
|
||||||
986
frame/1m/packm/ukernels/bli_packm_ref_cxk_ri3.c
Normal file
986
frame/1m/packm/ukernels/bli_packm_ref_cxk_ri3.c
Normal file
@@ -0,0 +1,986 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
BLIS
|
||||||
|
An object-based framework for developing high-performance BLAS-like
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
Copyright (C) 2014, The University of Texas at Austin
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
- Neither the name of The University of Texas at Austin nor the names
|
||||||
|
of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "blis.h"
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_ri3 )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNCCO
|
||||||
|
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname)( \
|
||||||
|
conj_t conja, \
|
||||||
|
dim_t n, \
|
||||||
|
void* beta, \
|
||||||
|
void* a, inc_t inca, inc_t lda, \
|
||||||
|
void* p, inc_t psp, inc_t ldp \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const inc_t inca2 = 2 * inca; \
|
||||||
|
const inc_t lda2 = 2 * lda; \
|
||||||
|
\
|
||||||
|
ctype* beta_cast = beta; \
|
||||||
|
ctype_r* restrict beta_r = ( ctype_r* )beta; \
|
||||||
|
ctype_r* restrict beta_i = ( ctype_r* )beta + 1; \
|
||||||
|
ctype_r* restrict alpha1_r = ( ctype_r* )a; \
|
||||||
|
ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \
|
||||||
|
ctype_r* restrict pi1_r = ( ctype_r* )p; \
|
||||||
|
ctype_r* restrict pi1_i = ( ctype_r* )p + psp; \
|
||||||
|
ctype_r* restrict pi1_ri = ( ctype_r* )p + 2*psp; \
|
||||||
|
\
|
||||||
|
if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
||||||
|
PASTEMAC(ch,copyjri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
||||||
|
PASTEMAC(ch,copyri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if ( bli_is_conj( conja ) ) \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
||||||
|
PASTEMAC(ch,scal2jri3s)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( ; n != 0; --n ) \
|
||||||
|
{ \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_ri + 0) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_ri + 1) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_ri + 2) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_ri + 3) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_ri + 4) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_ri + 5) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_ri + 6) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_ri + 7) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_ri + 8) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_ri + 9) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_ri +10) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_ri +11) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_ri +12) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_ri +13) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_ri +14) ); \
|
||||||
|
PASTEMAC(ch,scal2ri3s)( *beta_r, *beta_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_ri +15) ); \
|
||||||
|
\
|
||||||
|
alpha1_r += lda2; \
|
||||||
|
alpha1_i += lda2; \
|
||||||
|
pi1_r += ldp; \
|
||||||
|
pi1_i += ldp; \
|
||||||
|
pi1_ri += ldp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_ri3 )
|
||||||
|
|
||||||
@@ -35,20 +35,6 @@
|
|||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
#define GENTPROT( ctype, ch, varname ) \
|
||||||
\
|
\
|
||||||
void PASTEMAC(ch,varname)( \
|
|
||||||
conj_t conja, \
|
|
||||||
dim_t n, \
|
|
||||||
void* beta, \
|
|
||||||
void* a, inc_t inca, inc_t lda, \
|
|
||||||
void* p, inc_t ldp \
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT_GENTPROT_BASIC( packm_ref_6xk )
|
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROTCO
|
|
||||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \
|
|
||||||
\
|
|
||||||
void PASTEMAC(ch,varname)( \
|
void PASTEMAC(ch,varname)( \
|
||||||
conj_t conja, \
|
conj_t conja, \
|
||||||
dim_t n, \
|
dim_t n, \
|
||||||
@@ -57,7 +43,12 @@ void PASTEMAC(ch,varname)( \
|
|||||||
void* p, inc_t psp, inc_t ldp \
|
void* p, inc_t psp, inc_t ldp \
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_6xk_ri )
|
INSERT_GENTPROT_BASIC( packm_ref_2xk_ri3 )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_4xk_ri3 )
|
||||||
INSERT_GENTPROTCO_BASIC( packm_ref_6xk_ri3 )
|
INSERT_GENTPROT_BASIC( packm_ref_6xk_ri3 )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_8xk_ri3 )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_10xk_ri3 )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_12xk_ri3 )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_14xk_ri3 )
|
||||||
|
INSERT_GENTPROT_BASIC( packm_ref_16xk_ri3 )
|
||||||
|
|
||||||
Reference in New Issue
Block a user