This commit is contained in:
Tyler Smith
2014-03-10 15:16:21 -05:00
437 changed files with 28720 additions and 10061 deletions

View File

@@ -50,6 +50,12 @@
#define bli_auxinfo_set_next_a( a_p, auxinfo ) { (auxinfo).a_next = a_p; }
#define bli_auxinfo_set_next_b( b_p, auxinfo ) { (auxinfo).b_next = b_p; }
#define bli_auxinfo_set_next_ab( a_p, b_p, auxinfo ) \
{ \
bli_auxinfo_set_next_a( a_p, auxinfo ); \
bli_auxinfo_set_next_b( b_p, auxinfo ); \
}
#define bli_auxinfo_set_ps_a( a_p, auxinfo ) { (auxinfo).ps_a = a_p; }
#define bli_auxinfo_set_ps_b( b_p, auxinfo ) { (auxinfo).ps_b = b_p; }

View File

@@ -45,16 +45,7 @@
#define bli_dimag( x ) ( 0.0 )
#ifdef BLIS_ENABLE_C99_COMPLEX
#define bli_creal( x ) ( crealf(x) )
#define bli_cimag( x ) ( cimagf(x) )
#define bli_zreal( x ) ( creal(x) )
#define bli_zimag( x ) ( cimag(x) )
#else // ifndef BLIS_ENABLE_C99_COMPLEX
#ifndef BLIS_ENABLE_C99_COMPLEX
#define bli_creal( x ) ( (x).real )
@@ -63,6 +54,15 @@
#define bli_zimag( x ) ( (x).imag )
#else // ifdef BLIS_ENABLE_C99_COMPLEX
#define bli_creal( x ) ( crealf(x) )
#define bli_cimag( x ) ( cimagf(x) )
#define bli_zreal( x ) ( creal(x) )
#define bli_zimag( x ) ( cimag(x) )
#endif // BLIS_ENABLE_C99_COMPLEX

View File

@@ -194,6 +194,33 @@ GENTFUNCR( scomplex, float, c, s, tfuncname, varname ) \
GENTFUNCR( dcomplex, double, z, d, tfuncname, varname )
// -- Basic one-operand macro with complex domain only and real projection (with no auxiliary arguments) --
#define INSERT_GENTFUNCCO_BASIC0( tfuncname ) \
\
GENTFUNCCO( scomplex, float, c, s, tfuncname ) \
GENTFUNCCO( dcomplex, double, z, d, tfuncname )
// -- Basic one-operand macro with complex domain only and real projection --
#define INSERT_GENTFUNCCO_BASIC( tfuncname, varname ) \
\
GENTFUNCCO( scomplex, float, c, s, tfuncname, varname ) \
GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname )
// -- Basic one-operand macro with complex domain only and real projection (with two auxiliary arguments) --
#define INSERT_GENTFUNCCO_BASIC2( tfuncname, varname1, varname2 ) \
\
GENTFUNCCO( scomplex, float, c, s, tfuncname, varname1, varname2 ) \
GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname1, varname2 )
// -- Basic one-operand with real and integer projections --

View File

@@ -171,6 +171,15 @@ GENTPROTR( scomplex, float, c, s, funcname ) \
GENTPROTR( dcomplex, double, z, d, funcname )
// -- Basic one-operand macro with complex domain only and real projection --
#define INSERT_GENTPROTCO_BASIC( funcname ) \
\
GENTPROTCO( scomplex, float, c, s, funcname ) \
GENTPROTCO( dcomplex, double, z, d, funcname )
// -- Basic one-operand with real and integer projections --

View File

@@ -0,0 +1,413 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_KERNEL_3M_MACRO_DEFS_H
#define BLIS_KERNEL_3M_MACRO_DEFS_H
// -- Define datatype-agnostic base 3m kernel names ----------------------------
//
// Level-3 3m
//
// gemm3m micro-kernels
#ifndef BLIS_CGEMM3M_UKERNEL
#define BLIS_CGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL_REF
#endif
#ifndef BLIS_ZGEMM3M_UKERNEL
#define BLIS_ZGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL_REF
#endif
// gemmtrsm3m_l micro-kernels
#ifndef BLIS_CGEMMTRSM3M_L_UKERNEL
#define BLIS_CGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL_REF
#endif
#ifndef BLIS_ZGEMMTRSM3M_L_UKERNEL
#define BLIS_ZGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL_REF
#endif
// gemmtrsm3m_u micro-kernels
#ifndef BLIS_CGEMMTRSM3M_U_UKERNEL
#define BLIS_CGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL_REF
#endif
#ifndef BLIS_ZGEMMTRSM3M_U_UKERNEL
#define BLIS_ZGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL_REF
#endif
// trsm3m_l micro-kernels
#ifndef BLIS_CTRSM3M_L_UKERNEL
#define BLIS_CTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL_REF
#endif
#ifndef BLIS_ZTRSM3M_L_UKERNEL
#define BLIS_ZTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL_REF
#endif
// trsm3m_u micro-kernels
#ifndef BLIS_CTRSM3M_U_UKERNEL
#define BLIS_CTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL_REF
#endif
#ifndef BLIS_ZTRSM3M_U_UKERNEL
#define BLIS_ZTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL_REF
#endif
//
// Level-1m
//
// packm_2xk_ri3 kernels
#ifndef BLIS_SPACKM_2XK_RI3_KERNEL
#define BLIS_SPACKM_2XK_RI3_KERNEL BLIS_SPACKM_2XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_2XK_RI3_KERNEL
#define BLIS_DPACKM_2XK_RI3_KERNEL BLIS_DPACKM_2XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_2XK_RI3_KERNEL
#define BLIS_CPACKM_2XK_RI3_KERNEL BLIS_CPACKM_2XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_2XK_RI3_KERNEL
#define BLIS_ZPACKM_2XK_RI3_KERNEL BLIS_ZPACKM_2XK_RI3_KERNEL_REF
#endif
// packm_4xk_ri3 kernels
#ifndef BLIS_SPACKM_4XK_RI3_KERNEL
#define BLIS_SPACKM_4XK_RI3_KERNEL BLIS_SPACKM_4XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_4XK_RI3_KERNEL
#define BLIS_DPACKM_4XK_RI3_KERNEL BLIS_DPACKM_4XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_4XK_RI3_KERNEL
#define BLIS_CPACKM_4XK_RI3_KERNEL BLIS_CPACKM_4XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_4XK_RI3_KERNEL
#define BLIS_ZPACKM_4XK_RI3_KERNEL BLIS_ZPACKM_4XK_RI3_KERNEL_REF
#endif
// packm_6xk_ri3 kernels
#ifndef BLIS_SPACKM_6XK_RI3_KERNEL
#define BLIS_SPACKM_6XK_RI3_KERNEL BLIS_SPACKM_6XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_6XK_RI3_KERNEL
#define BLIS_DPACKM_6XK_RI3_KERNEL BLIS_DPACKM_6XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_6XK_RI3_KERNEL
#define BLIS_CPACKM_6XK_RI3_KERNEL BLIS_CPACKM_6XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_6XK_RI3_KERNEL
#define BLIS_ZPACKM_6XK_RI3_KERNEL BLIS_ZPACKM_6XK_RI3_KERNEL_REF
#endif
// packm_8xk_ri3 kernels
#ifndef BLIS_SPACKM_8XK_RI3_KERNEL
#define BLIS_SPACKM_8XK_RI3_KERNEL BLIS_SPACKM_8XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_8XK_RI3_KERNEL
#define BLIS_DPACKM_8XK_RI3_KERNEL BLIS_DPACKM_8XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_8XK_RI3_KERNEL
#define BLIS_CPACKM_8XK_RI3_KERNEL BLIS_CPACKM_8XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_8XK_RI3_KERNEL
#define BLIS_ZPACKM_8XK_RI3_KERNEL BLIS_ZPACKM_8XK_RI3_KERNEL_REF
#endif
// packm_10xk_ri3 kernels
#ifndef BLIS_SPACKM_10XK_RI3_KERNEL
#define BLIS_SPACKM_10XK_RI3_KERNEL BLIS_SPACKM_10XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_10XK_RI3_KERNEL
#define BLIS_DPACKM_10XK_RI3_KERNEL BLIS_DPACKM_10XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_10XK_RI3_KERNEL
#define BLIS_CPACKM_10XK_RI3_KERNEL BLIS_CPACKM_10XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_10XK_RI3_KERNEL
#define BLIS_ZPACKM_10XK_RI3_KERNEL BLIS_ZPACKM_10XK_RI3_KERNEL_REF
#endif
// packm_12xk_ri3 kernels
#ifndef BLIS_SPACKM_12XK_RI3_KERNEL
#define BLIS_SPACKM_12XK_RI3_KERNEL BLIS_SPACKM_12XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_12XK_RI3_KERNEL
#define BLIS_DPACKM_12XK_RI3_KERNEL BLIS_DPACKM_12XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_12XK_RI3_KERNEL
#define BLIS_CPACKM_12XK_RI3_KERNEL BLIS_CPACKM_12XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_12XK_RI3_KERNEL
#define BLIS_ZPACKM_12XK_RI3_KERNEL BLIS_ZPACKM_12XK_RI3_KERNEL_REF
#endif
// packm_14xk_ri3 kernels
#ifndef BLIS_SPACKM_14XK_RI3_KERNEL
#define BLIS_SPACKM_14XK_RI3_KERNEL BLIS_SPACKM_14XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_14XK_RI3_KERNEL
#define BLIS_DPACKM_14XK_RI3_KERNEL BLIS_DPACKM_14XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_14XK_RI3_KERNEL
#define BLIS_CPACKM_14XK_RI3_KERNEL BLIS_CPACKM_14XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_14XK_RI3_KERNEL
#define BLIS_ZPACKM_14XK_RI3_KERNEL BLIS_ZPACKM_14XK_RI3_KERNEL_REF
#endif
// packm_16xk_ri3 kernels
#ifndef BLIS_SPACKM_16XK_RI3_KERNEL
#define BLIS_SPACKM_16XK_RI3_KERNEL BLIS_SPACKM_16XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_16XK_RI3_KERNEL
#define BLIS_DPACKM_16XK_RI3_KERNEL BLIS_DPACKM_16XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_16XK_RI3_KERNEL
#define BLIS_CPACKM_16XK_RI3_KERNEL BLIS_CPACKM_16XK_RI3_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_16XK_RI3_KERNEL
#define BLIS_ZPACKM_16XK_RI3_KERNEL BLIS_ZPACKM_16XK_RI3_KERNEL_REF
#endif
// -- Define default 3m-specific blocksize macros ------------------------------
// Define complex 3m register blocksizes in terms of blocksizes used for
// real kernels.
// 3m register blocksizes
#define BLIS_DEFAULT_3M_MR_C BLIS_DEFAULT_MR_S
#define BLIS_DEFAULT_3M_KR_C BLIS_DEFAULT_KR_S
#define BLIS_DEFAULT_3M_NR_C BLIS_DEFAULT_NR_S
#define BLIS_DEFAULT_3M_MR_Z BLIS_DEFAULT_MR_D
#define BLIS_DEFAULT_3M_KR_Z BLIS_DEFAULT_KR_D
#define BLIS_DEFAULT_3M_NR_Z BLIS_DEFAULT_NR_D
// 3m register blocksize extensions
#define BLIS_EXTEND_3M_MR_C BLIS_EXTEND_MR_S
#define BLIS_EXTEND_3M_KR_C 0
#define BLIS_EXTEND_3M_NR_C BLIS_EXTEND_NR_S
#define BLIS_EXTEND_3M_MR_Z BLIS_EXTEND_MR_D
#define BLIS_EXTEND_3M_KR_Z 0
#define BLIS_EXTEND_3M_NR_Z BLIS_EXTEND_NR_D
// Define complex 3m cache blocksizes in terms of blocksizes used for
// real operations (if they have not yet already been defined).
// 3m cache blocksizes
#ifndef BLIS_DEFAULT_3M_MC_C
#define BLIS_DEFAULT_3M_MC_C ((BLIS_DEFAULT_MC_S)/1)
#endif
#ifndef BLIS_DEFAULT_3M_KC_C
#define BLIS_DEFAULT_3M_KC_C ((BLIS_DEFAULT_KC_S)/2)
#endif
#ifndef BLIS_DEFAULT_3M_NC_C
#define BLIS_DEFAULT_3M_NC_C ((BLIS_DEFAULT_NC_S)/1)
#endif
#ifndef BLIS_DEFAULT_3M_MC_Z
#define BLIS_DEFAULT_3M_MC_Z ((BLIS_DEFAULT_MC_D)/1)
#endif
#ifndef BLIS_DEFAULT_3M_KC_Z
#define BLIS_DEFAULT_3M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
#endif
#ifndef BLIS_DEFAULT_3M_NC_Z
#define BLIS_DEFAULT_3M_NC_Z ((BLIS_DEFAULT_NC_D)/1)
#endif
// 3m cache blocksize extensions
#ifndef BLIS_EXTEND_3M_MC_C
#define BLIS_EXTEND_3M_MC_C 0
#endif
#ifndef BLIS_EXTEND_3M_KC_C
#define BLIS_EXTEND_3M_KC_C 0
#endif
#ifndef BLIS_EXTEND_3M_NC_C
#define BLIS_EXTEND_3M_NC_C 0
#endif
#ifndef BLIS_EXTEND_3M_MC_Z
#define BLIS_EXTEND_3M_MC_Z 0
#endif
#ifndef BLIS_EXTEND_3M_KC_Z
#define BLIS_EXTEND_3M_KC_Z 0
#endif
#ifndef BLIS_EXTEND_3M_NC_Z
#define BLIS_EXTEND_3M_NC_Z 0
#endif
// -- Kernel blocksize checks --------------------------------------------------
// Verify that cache blocksizes are whole multiples of register blocksizes.
// Specifically, verify that:
// - MC is a whole multiple of MR *AND* NR.
// - NC is a whole multiple of NR *AND* MR.
// - KC is a whole multiple of KR *AND* both MR, NR.
// These constraints are enforced because it makes it easier to handle diagonals
// in the macro-kernel implementations.
//
// MC must be a whole multiple of MR and NR.
//
#if ( \
( BLIS_DEFAULT_3M_MC_C % BLIS_DEFAULT_3M_MR_C != 0 ) || \
( BLIS_DEFAULT_3M_MC_Z % BLIS_DEFAULT_3M_MR_Z != 0 ) \
)
#error "MC (3m) must be multiple of MR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_3M_MC_C % BLIS_DEFAULT_3M_NR_C != 0 ) || \
( BLIS_DEFAULT_3M_MC_Z % BLIS_DEFAULT_3M_NR_Z != 0 ) \
)
#error "MC (3m) must be multiple of NR for all datatypes."
#endif
//
// NC must be a whole multiple of NR and MR.
//
#if ( \
( BLIS_DEFAULT_3M_NC_C % BLIS_DEFAULT_3M_NR_C != 0 ) || \
( BLIS_DEFAULT_3M_NC_Z % BLIS_DEFAULT_3M_NR_Z != 0 ) \
)
#error "NC (3m) must be multiple of NR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_3M_NC_C % BLIS_DEFAULT_3M_MR_C != 0 ) || \
( BLIS_DEFAULT_3M_NC_Z % BLIS_DEFAULT_3M_MR_Z != 0 ) \
)
#error "NC (3m) must be multiple of MR for all datatypes."
#endif
//
// KC must be a whole multiple of KR, MR, and NR.
//
#if ( \
( BLIS_DEFAULT_3M_KC_C % BLIS_DEFAULT_3M_KR_C != 0 ) || \
( BLIS_DEFAULT_3M_KC_Z % BLIS_DEFAULT_3M_KR_Z != 0 ) \
)
#error "KC (3m) must be multiple of KR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_3M_KC_C % BLIS_DEFAULT_3M_MR_C != 0 ) || \
( BLIS_DEFAULT_3M_KC_Z % BLIS_DEFAULT_3M_MR_Z != 0 ) \
)
#error "KC (3m) must be multiple of MR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_3M_KC_C % BLIS_DEFAULT_3M_NR_C != 0 ) || \
( BLIS_DEFAULT_3M_KC_Z % BLIS_DEFAULT_3M_NR_Z != 0 ) \
)
#error "KC (3m) must be multiple of NR for all datatypes."
#endif
// -- Compute extended blocksizes ----------------------------------------------
//
// Compute maximum cache blocksizes.
//
#define BLIS_MAXIMUM_3M_MC_C ( BLIS_DEFAULT_3M_MC_C + BLIS_EXTEND_3M_MC_C )
#define BLIS_MAXIMUM_3M_KC_C ( BLIS_DEFAULT_3M_KC_C + BLIS_EXTEND_3M_KC_C )
#define BLIS_MAXIMUM_3M_NC_C ( BLIS_DEFAULT_3M_NC_C + BLIS_EXTEND_3M_NC_C )
#define BLIS_MAXIMUM_3M_MC_Z ( BLIS_DEFAULT_3M_MC_Z + BLIS_EXTEND_3M_MC_Z )
#define BLIS_MAXIMUM_3M_KC_Z ( BLIS_DEFAULT_3M_KC_Z + BLIS_EXTEND_3M_KC_Z )
#define BLIS_MAXIMUM_3M_NC_Z ( BLIS_DEFAULT_3M_NC_Z + BLIS_EXTEND_3M_NC_Z )
//
// Compute leading dimension blocksizes used when packing micro-panels.
//
#define BLIS_PACKDIM_3M_MR_C ( BLIS_DEFAULT_3M_MR_C + BLIS_EXTEND_3M_MR_C )
#define BLIS_PACKDIM_3M_KR_C ( BLIS_DEFAULT_3M_KR_C + BLIS_EXTEND_3M_KR_C )
#define BLIS_PACKDIM_3M_NR_C ( BLIS_DEFAULT_3M_NR_C + BLIS_EXTEND_3M_NR_C )
#define BLIS_PACKDIM_3M_MR_Z ( BLIS_DEFAULT_3M_MR_Z + BLIS_EXTEND_3M_MR_Z )
#define BLIS_PACKDIM_3M_KR_Z ( BLIS_DEFAULT_3M_KR_Z + BLIS_EXTEND_3M_KR_Z )
#define BLIS_PACKDIM_3M_NR_Z ( BLIS_DEFAULT_3M_NR_Z + BLIS_EXTEND_3M_NR_Z )
#endif

View File

@@ -0,0 +1,416 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_KERNEL_4M_MACRO_DEFS_H
#define BLIS_KERNEL_4M_MACRO_DEFS_H
// -- Construct 4m kernel function names ---------------------------------------
//
// Level-3 4m
//
// gemm4m micro-kernels
#ifndef BLIS_CGEMM4M_UKERNEL
#define BLIS_CGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL_REF
#endif
#ifndef BLIS_ZGEMM4M_UKERNEL
#define BLIS_ZGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL_REF
#endif
// gemmtrsm4m_l micro-kernels
#ifndef BLIS_CGEMMTRSM4M_L_UKERNEL
#define BLIS_CGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL_REF
#endif
#ifndef BLIS_ZGEMMTRSM4M_L_UKERNEL
#define BLIS_ZGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL_REF
#endif
// gemmtrsm4m_u micro-kernels
#ifndef BLIS_CGEMMTRSM4M_U_UKERNEL
#define BLIS_CGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL_REF
#endif
#ifndef BLIS_ZGEMMTRSM4M_U_UKERNEL
#define BLIS_ZGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL_REF
#endif
// trsm4m_l micro-kernels
#ifndef BLIS_CTRSM4M_L_UKERNEL
#define BLIS_CTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL_REF
#endif
#ifndef BLIS_ZTRSM4M_L_UKERNEL
#define BLIS_ZTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL_REF
#endif
// trsm4m_u micro-kernels
#ifndef BLIS_CTRSM4M_U_UKERNEL
#define BLIS_CTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL_REF
#endif
#ifndef BLIS_ZTRSM4M_U_UKERNEL
#define BLIS_ZTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL_REF
#endif
//
// Level-1m
//
// packm_2xk_ri kernels
#ifndef BLIS_SPACKM_2XK_RI_KERNEL
#define BLIS_SPACKM_2XK_RI_KERNEL BLIS_SPACKM_2XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_2XK_RI_KERNEL
#define BLIS_DPACKM_2XK_RI_KERNEL BLIS_DPACKM_2XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_2XK_RI_KERNEL
#define BLIS_CPACKM_2XK_RI_KERNEL BLIS_CPACKM_2XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_2XK_RI_KERNEL
#define BLIS_ZPACKM_2XK_RI_KERNEL BLIS_ZPACKM_2XK_RI_KERNEL_REF
#endif
// packm_4xk_ri kernels
#ifndef BLIS_SPACKM_4XK_RI_KERNEL
#define BLIS_SPACKM_4XK_RI_KERNEL BLIS_SPACKM_4XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_4XK_RI_KERNEL
#define BLIS_DPACKM_4XK_RI_KERNEL BLIS_DPACKM_4XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_4XK_RI_KERNEL
#define BLIS_CPACKM_4XK_RI_KERNEL BLIS_CPACKM_4XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_4XK_RI_KERNEL
#define BLIS_ZPACKM_4XK_RI_KERNEL BLIS_ZPACKM_4XK_RI_KERNEL_REF
#endif
// packm_6xk_ri kernels
#ifndef BLIS_SPACKM_6XK_RI_KERNEL
#define BLIS_SPACKM_6XK_RI_KERNEL BLIS_SPACKM_6XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_6XK_RI_KERNEL
#define BLIS_DPACKM_6XK_RI_KERNEL BLIS_DPACKM_6XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_6XK_RI_KERNEL
#define BLIS_CPACKM_6XK_RI_KERNEL BLIS_CPACKM_6XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_6XK_RI_KERNEL
#define BLIS_ZPACKM_6XK_RI_KERNEL BLIS_ZPACKM_6XK_RI_KERNEL_REF
#endif
// packm_8xk_ri kernels
#ifndef BLIS_SPACKM_8XK_RI_KERNEL
#define BLIS_SPACKM_8XK_RI_KERNEL BLIS_SPACKM_8XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_8XK_RI_KERNEL
#define BLIS_DPACKM_8XK_RI_KERNEL BLIS_DPACKM_8XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_8XK_RI_KERNEL
#define BLIS_CPACKM_8XK_RI_KERNEL BLIS_CPACKM_8XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_8XK_RI_KERNEL
#define BLIS_ZPACKM_8XK_RI_KERNEL BLIS_ZPACKM_8XK_RI_KERNEL_REF
#endif
// packm_10xk_ri kernels
#ifndef BLIS_SPACKM_10XK_RI_KERNEL
#define BLIS_SPACKM_10XK_RI_KERNEL BLIS_SPACKM_10XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_10XK_RI_KERNEL
#define BLIS_DPACKM_10XK_RI_KERNEL BLIS_DPACKM_10XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_10XK_RI_KERNEL
#define BLIS_CPACKM_10XK_RI_KERNEL BLIS_CPACKM_10XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_10XK_RI_KERNEL
#define BLIS_ZPACKM_10XK_RI_KERNEL BLIS_ZPACKM_10XK_RI_KERNEL_REF
#endif
// packm_12xk_ri kernels
#ifndef BLIS_SPACKM_12XK_RI_KERNEL
#define BLIS_SPACKM_12XK_RI_KERNEL BLIS_SPACKM_12XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_12XK_RI_KERNEL
#define BLIS_DPACKM_12XK_RI_KERNEL BLIS_DPACKM_12XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_12XK_RI_KERNEL
#define BLIS_CPACKM_12XK_RI_KERNEL BLIS_CPACKM_12XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_12XK_RI_KERNEL
#define BLIS_ZPACKM_12XK_RI_KERNEL BLIS_ZPACKM_12XK_RI_KERNEL_REF
#endif
// packm_14xk_ri kernels
#ifndef BLIS_SPACKM_14XK_RI_KERNEL
#define BLIS_SPACKM_14XK_RI_KERNEL BLIS_SPACKM_14XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_14XK_RI_KERNEL
#define BLIS_DPACKM_14XK_RI_KERNEL BLIS_DPACKM_14XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_14XK_RI_KERNEL
#define BLIS_CPACKM_14XK_RI_KERNEL BLIS_CPACKM_14XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_14XK_RI_KERNEL
#define BLIS_ZPACKM_14XK_RI_KERNEL BLIS_ZPACKM_14XK_RI_KERNEL_REF
#endif
// packm_16xk_ri kernels
#ifndef BLIS_SPACKM_16XK_RI_KERNEL
#define BLIS_SPACKM_16XK_RI_KERNEL BLIS_SPACKM_16XK_RI_KERNEL_REF
#endif
#ifndef BLIS_DPACKM_16XK_RI_KERNEL
#define BLIS_DPACKM_16XK_RI_KERNEL BLIS_DPACKM_16XK_RI_KERNEL_REF
#endif
#ifndef BLIS_CPACKM_16XK_RI_KERNEL
#define BLIS_CPACKM_16XK_RI_KERNEL BLIS_CPACKM_16XK_RI_KERNEL_REF
#endif
#ifndef BLIS_ZPACKM_16XK_RI_KERNEL
#define BLIS_ZPACKM_16XK_RI_KERNEL BLIS_ZPACKM_16XK_RI_KERNEL_REF
#endif
// -- Define default 4m-specific blocksize macros ------------------------------
// Define complex 4m register blocksizes in terms of blocksizes used for
// real kernels.
// 4m register blocksizes
#define BLIS_DEFAULT_4M_MR_C BLIS_DEFAULT_MR_S
#define BLIS_DEFAULT_4M_KR_C BLIS_DEFAULT_KR_S
#define BLIS_DEFAULT_4M_NR_C BLIS_DEFAULT_NR_S
#define BLIS_DEFAULT_4M_MR_Z BLIS_DEFAULT_MR_D
#define BLIS_DEFAULT_4M_KR_Z BLIS_DEFAULT_KR_D
#define BLIS_DEFAULT_4M_NR_Z BLIS_DEFAULT_NR_D
// 4m register blocksize extensions
#define BLIS_EXTEND_4M_MR_C BLIS_EXTEND_MR_S
#define BLIS_EXTEND_4M_KR_C 0
#define BLIS_EXTEND_4M_NR_C BLIS_EXTEND_NR_S
#define BLIS_EXTEND_4M_MR_Z BLIS_EXTEND_MR_D
#define BLIS_EXTEND_4M_KR_Z 0
#define BLIS_EXTEND_4M_NR_Z BLIS_EXTEND_NR_D
// Define complex 4m cache blocksizes in terms of blocksizes used for
// real operations (if they have not yet already been defined).
// 4m cache blocksizes
#ifndef BLIS_DEFAULT_4M_MC_C
#define BLIS_DEFAULT_4M_MC_C ((BLIS_DEFAULT_MC_S)/1)
#endif
#ifndef BLIS_DEFAULT_4M_KC_C
#define BLIS_DEFAULT_4M_KC_C ((BLIS_DEFAULT_KC_S)/2)
#endif
#ifndef BLIS_DEFAULT_4M_NC_C
#define BLIS_DEFAULT_4M_NC_C ((BLIS_DEFAULT_NC_S)/1)
#endif
#ifndef BLIS_DEFAULT_4M_MC_Z
#define BLIS_DEFAULT_4M_MC_Z ((BLIS_DEFAULT_MC_D)/1)
#endif
#ifndef BLIS_DEFAULT_4M_KC_Z
#define BLIS_DEFAULT_4M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
#endif
#ifndef BLIS_DEFAULT_4M_NC_Z
#define BLIS_DEFAULT_4M_NC_Z ((BLIS_DEFAULT_NC_D)/1)
#endif
// 4m cache blocksize extensions
#ifndef BLIS_EXTEND_4M_MC_C
#define BLIS_EXTEND_4M_MC_C 0
#endif
#ifndef BLIS_EXTEND_4M_KC_C
#define BLIS_EXTEND_4M_KC_C 0
#endif
#ifndef BLIS_EXTEND_4M_NC_C
#define BLIS_EXTEND_4M_NC_C 0
#endif
#ifndef BLIS_EXTEND_4M_MC_Z
#define BLIS_EXTEND_4M_MC_Z 0
#endif
#ifndef BLIS_EXTEND_4M_KC_Z
#define BLIS_EXTEND_4M_KC_Z 0
#endif
#ifndef BLIS_EXTEND_4M_NC_Z
#define BLIS_EXTEND_4M_NC_Z 0
#endif
// -- Kernel blocksize checks --------------------------------------------------
// Verify that cache blocksizes are whole multiples of register blocksizes.
// Specifically, verify that:
// - MC is a whole multiple of MR *AND* NR.
// - NC is a whole multiple of NR *AND* MR.
// - KC is a whole multiple of KR *AND* both MR, NR.
// These constraints are enforced because it makes it easier to handle diagonals
// in the macro-kernel implementations.
//
// MC must be a whole multiple of MR and NR.
//
#if ( \
( BLIS_DEFAULT_4M_MC_C % BLIS_DEFAULT_4M_MR_C != 0 ) || \
( BLIS_DEFAULT_4M_MC_Z % BLIS_DEFAULT_4M_MR_Z != 0 ) \
)
#error "MC (4m) must be multiple of MR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_4M_MC_C % BLIS_DEFAULT_4M_NR_C != 0 ) || \
( BLIS_DEFAULT_4M_MC_Z % BLIS_DEFAULT_4M_NR_Z != 0 ) \
)
#error "MC (4m) must be multiple of NR for all datatypes."
#endif
//
// NC must be a whole multiple of NR and MR.
//
#if ( \
( BLIS_DEFAULT_4M_NC_C % BLIS_DEFAULT_4M_NR_C != 0 ) || \
( BLIS_DEFAULT_4M_NC_Z % BLIS_DEFAULT_4M_NR_Z != 0 ) \
)
#error "NC (4m) must be multiple of NR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_4M_NC_C % BLIS_DEFAULT_4M_MR_C != 0 ) || \
( BLIS_DEFAULT_4M_NC_Z % BLIS_DEFAULT_4M_MR_Z != 0 ) \
)
#error "NC (4m) must be multiple of MR for all datatypes."
#endif
//
// KC must be a whole multiple of KR, MR, and NR.
//
#if ( \
( BLIS_DEFAULT_4M_KC_C % BLIS_DEFAULT_4M_KR_C != 0 ) || \
( BLIS_DEFAULT_4M_KC_Z % BLIS_DEFAULT_4M_KR_Z != 0 ) \
)
#error "KC (4m) must be multiple of KR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_4M_KC_C % BLIS_DEFAULT_4M_MR_C != 0 ) || \
( BLIS_DEFAULT_4M_KC_Z % BLIS_DEFAULT_4M_MR_Z != 0 ) \
)
#error "KC (4m) must be multiple of MR for all datatypes."
#endif
#if ( \
( BLIS_DEFAULT_4M_KC_C % BLIS_DEFAULT_4M_NR_C != 0 ) || \
( BLIS_DEFAULT_4M_KC_Z % BLIS_DEFAULT_4M_NR_Z != 0 ) \
)
#error "KC (4m) must be multiple of NR for all datatypes."
#endif
// -- Compute extended blocksizes ----------------------------------------------
//
// Compute maximum cache blocksizes.
//
#define BLIS_MAXIMUM_4M_MC_C ( BLIS_DEFAULT_4M_MC_C + BLIS_EXTEND_4M_MC_C )
#define BLIS_MAXIMUM_4M_KC_C ( BLIS_DEFAULT_4M_KC_C + BLIS_EXTEND_4M_KC_C )
#define BLIS_MAXIMUM_4M_NC_C ( BLIS_DEFAULT_4M_NC_C + BLIS_EXTEND_4M_NC_C )
#define BLIS_MAXIMUM_4M_MC_Z ( BLIS_DEFAULT_4M_MC_Z + BLIS_EXTEND_4M_MC_Z )
#define BLIS_MAXIMUM_4M_KC_Z ( BLIS_DEFAULT_4M_KC_Z + BLIS_EXTEND_4M_KC_Z )
#define BLIS_MAXIMUM_4M_NC_Z ( BLIS_DEFAULT_4M_NC_Z + BLIS_EXTEND_4M_NC_Z )
//
// Compute leading dimension blocksizes used when packing micro-panels.
//
#define BLIS_PACKDIM_4M_MR_C ( BLIS_DEFAULT_4M_MR_C + BLIS_EXTEND_4M_MR_C )
#define BLIS_PACKDIM_4M_KR_C ( BLIS_DEFAULT_4M_KR_C + BLIS_EXTEND_4M_KR_C )
#define BLIS_PACKDIM_4M_NR_C ( BLIS_DEFAULT_4M_NR_C + BLIS_EXTEND_4M_NR_C )
#define BLIS_PACKDIM_4M_MR_Z ( BLIS_DEFAULT_4M_MR_Z + BLIS_EXTEND_4M_MR_Z )
#define BLIS_PACKDIM_4M_KR_Z ( BLIS_DEFAULT_4M_KR_Z + BLIS_EXTEND_4M_KR_Z )
#define BLIS_PACKDIM_4M_NR_Z ( BLIS_DEFAULT_4M_NR_Z + BLIS_EXTEND_4M_NR_Z )
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -35,6 +35,238 @@
#ifndef BLIS_KERNEL_POST_MACRO_DEFS_H
#define BLIS_KERNEL_POST_MACRO_DEFS_H
/*
// -- Define PASTEMAC-friendly kernel function name macros ---------------------
//
// Level-3
//
// gemm micro-kernels
#define bli_sGEMM_UKERNEL BLIS_SGEMM_UKERNEL
#define bli_dGEMM_UKERNEL BLIS_DGEMM_UKERNEL
#define bli_cGEMM_UKERNEL BLIS_CGEMM_UKERNEL
#define bli_zGEMM_UKERNEL BLIS_ZGEMM_UKERNEL
// gemmtrsm_l micro-kernels
#define bli_sGEMMTRSM_L_UKERNEL BLIS_SGEMMTRSM_L_UKERNEL
#define bli_dGEMMTRSM_L_UKERNEL BLIS_DGEMMTRSM_L_UKERNEL
#define bli_cGEMMTRSM_L_UKERNEL BLIS_CGEMMTRSM_L_UKERNEL
#define bli_zGEMMTRSM_L_UKERNEL BLIS_ZGEMMTRSM_L_UKERNEL
// gemmtrsm_u micro-kernels
#define bli_sGEMMTRSM_U_UKERNEL BLIS_SGEMMTRSM_U_UKERNEL
#define bli_dGEMMTRSM_U_UKERNEL BLIS_DGEMMTRSM_U_UKERNEL
#define bli_cGEMMTRSM_U_UKERNEL BLIS_CGEMMTRSM_U_UKERNEL
#define bli_zGEMMTRSM_U_UKERNEL BLIS_ZGEMMTRSM_U_UKERNEL
// trsm_l micro-kernels
#define bli_sTRSM_L_UKERNEL BLIS_STRSM_L_UKERNEL
#define bli_dTRSM_L_UKERNEL BLIS_DTRSM_L_UKERNEL
#define bli_cTRSM_L_UKERNEL BLIS_CTRSM_L_UKERNEL
#define bli_zTRSM_L_UKERNEL BLIS_ZTRSM_L_UKERNEL
// trsm_u micro-kernels
#define bli_sTRSM_U_UKERNEL BLIS_STRSM_U_UKERNEL
#define bli_dTRSM_U_UKERNEL BLIS_DTRSM_U_UKERNEL
#define bli_cTRSM_U_UKERNEL BLIS_CTRSM_U_UKERNEL
#define bli_zTRSM_U_UKERNEL BLIS_ZTRSM_U_UKERNEL
//
// Level-3 4m
//
// gemm4m micro-kernels
#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL
#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL
// gemmtrsm4m_l micro-kernels
#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL
#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL
// gemmtrsm4m_u micro-kernels
#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL
#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL
// trsm4m_l micro-kernels
#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL
#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL
// trsm4m_u micro-kernels
#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL
#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL
//
// Level-3 3m
//
// gemm3m micro-kernels
#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL
#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL
// gemmtrsm3m_l micro-kernels
#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL
#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL
// gemmtrsm3m_u micro-kernels
#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL
#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL
// trsm3m_l micro-kernels
#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL
#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL
// trsm3m_u micro-kernels
#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL
#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL
//
// Level-1m
//
// NOTE: We don't need any PASTEMAC-friendly aliases to packm kernel
// macros because they are used directly in the initialization of the
// function pointer array, rather than via a templatizing wrapper macro.
//
// Level-1f
//
// axpy2v kernels
#define bli_sssAXPY2V_KERNEL BLIS_SAXPY2V_KERNEL
#define bli_dddAXPY2V_KERNEL BLIS_DAXPY2V_KERNEL
#define bli_cccAXPY2V_KERNEL BLIS_CAXPY2V_KERNEL
#define bli_zzzAXPY2V_KERNEL BLIS_ZAXPY2V_KERNEL
// dotaxpyv kernels
#define bli_sssDOTAXPYV_KERNEL BLIS_SDOTAXPYV_KERNEL
#define bli_dddDOTAXPYV_KERNEL BLIS_DDOTAXPYV_KERNEL
#define bli_cccDOTAXPYV_KERNEL BLIS_CDOTAXPYV_KERNEL
#define bli_zzzDOTAXPYV_KERNEL BLIS_ZDOTAXPYV_KERNEL
// axpyf kernels
#define bli_sssAXPYF_KERNEL BLIS_SAXPYF_KERNEL
#define bli_dddAXPYF_KERNEL BLIS_DAXPYF_KERNEL
#define bli_cccAXPYF_KERNEL BLIS_CAXPYF_KERNEL
#define bli_zzzAXPYF_KERNEL BLIS_ZAXPYF_KERNEL
// dotxf kernels
#define bli_sssDOTXF_KERNEL BLIS_SDOTXF_KERNEL
#define bli_dddDOTXF_KERNEL BLIS_DDOTXF_KERNEL
#define bli_cccDOTXF_KERNEL BLIS_CDOTXF_KERNEL
#define bli_zzzDOTXF_KERNEL BLIS_ZDOTXF_KERNEL
// dotxaxpyf kernels
#define bli_sssDOTXAXPYF_KERNEL BLIS_SDOTXAXPYF_KERNEL
#define bli_dddDOTXAXPYF_KERNEL BLIS_DDOTXAXPYF_KERNEL
#define bli_cccDOTXAXPYF_KERNEL BLIS_CDOTXAXPYF_KERNEL
#define bli_zzzDOTXAXPYF_KERNEL BLIS_ZDOTXAXPYF_KERNEL
//
// Level-1v
//
// addv kernels
#define bli_ssADDV_KERNEL BLIS_SADDV_KERNEL
#define bli_ddADDV_KERNEL BLIS_DADDV_KERNEL
#define bli_ccADDV_KERNEL BLIS_CADDV_KERNEL
#define bli_zzADDV_KERNEL BLIS_ZADDV_KERNEL
// axpyv kernels
#define bli_sssAXPYV_KERNEL BLIS_SAXPYV_KERNEL
#define bli_dddAXPYV_KERNEL BLIS_DAXPYV_KERNEL
#define bli_cccAXPYV_KERNEL BLIS_CAXPYV_KERNEL
#define bli_zzzAXPYV_KERNEL BLIS_ZAXPYV_KERNEL
// copyv kernels
#define bli_ssCOPYV_KERNEL BLIS_SCOPYV_KERNEL
#define bli_ddCOPYV_KERNEL BLIS_DCOPYV_KERNEL
#define bli_ccCOPYV_KERNEL BLIS_CCOPYV_KERNEL
#define bli_zzCOPYV_KERNEL BLIS_ZCOPYV_KERNEL
// dotv kernels
#define bli_sssDOTV_KERNEL BLIS_SDOTV_KERNEL
#define bli_dddDOTV_KERNEL BLIS_DDOTV_KERNEL
#define bli_cccDOTV_KERNEL BLIS_CDOTV_KERNEL
#define bli_zzzDOTV_KERNEL BLIS_ZDOTV_KERNEL
// dotxv kernels
#define bli_sssDOTXV_KERNEL BLIS_SDOTXV_KERNEL
#define bli_dddDOTXV_KERNEL BLIS_DDOTXV_KERNEL
#define bli_cccDOTXV_KERNEL BLIS_CDOTXV_KERNEL
#define bli_zzzDOTXV_KERNEL BLIS_ZDOTXV_KERNEL
// invertv kernels
#define bli_sINVERTV_KERNEL BLIS_SINVERTV_KERNEL
#define bli_dINVERTV_KERNEL BLIS_DINVERTV_KERNEL
#define bli_cINVERTV_KERNEL BLIS_CINVERTV_KERNEL
#define bli_zINVERTV_KERNEL BLIS_ZINVERTV_KERNEL
// scal2v kernels
#define bli_sssSCAL2V_KERNEL BLIS_SSCAL2V_KERNEL
#define bli_dddSCAL2V_KERNEL BLIS_DSCAL2V_KERNEL
#define bli_cccSCAL2V_KERNEL BLIS_CSCAL2V_KERNEL
#define bli_zzzSCAL2V_KERNEL BLIS_ZSCAL2V_KERNEL
// scalv kernels
#define bli_ssSCALV_KERNEL BLIS_SSCALV_KERNEL
#define bli_ddSCALV_KERNEL BLIS_DSCALV_KERNEL
#define bli_ccSCALV_KERNEL BLIS_CSCALV_KERNEL
#define bli_zzSCALV_KERNEL BLIS_ZSCALV_KERNEL
// setv kernels
#define bli_ssSETV_KERNEL BLIS_SSETV_KERNEL
#define bli_ddSETV_KERNEL BLIS_DSETV_KERNEL
#define bli_ccSETV_KERNEL BLIS_CSETV_KERNEL
#define bli_zzSETV_KERNEL BLIS_ZSETV_KERNEL
// subv kernels
#define bli_ssSUBV_KERNEL BLIS_SSUBV_KERNEL
#define bli_ddSUBV_KERNEL BLIS_DSUBV_KERNEL
#define bli_ccSUBV_KERNEL BLIS_CSUBV_KERNEL
#define bli_zzSUBV_KERNEL BLIS_ZSUBV_KERNEL
// swapv kernels
#define bli_ssSWAPV_KERNEL BLIS_SSWAPV_KERNEL
#define bli_ddSWAPV_KERNEL BLIS_DSWAPV_KERNEL
#define bli_ccSWAPV_KERNEL BLIS_CSWAPV_KERNEL
#define bli_zzSWAPV_KERNEL BLIS_ZSWAPV_KERNEL
*/
// -- Maximum register blocksize search ----------------------------------------
//
@@ -43,13 +275,47 @@
#define BLIS_MAX_DEFAULT_MR_S BLIS_DEFAULT_MR_S
#define BLIS_MAX_DEFAULT_MR_D BLIS_DEFAULT_MR_D
// NOTE: 4m and 3m register blocksizes are assumed to be equal. Thus,
// we only inspect the 4m values.
// c: Choose between the regular and 4m/3m blocksize.
#define BLIS_MAX_DEFAULT_MR_C BLIS_DEFAULT_MR_C
#if BLIS_DEFAULT_4M_MR_C > BLIS_MAX_DEFAULT_MR_C
#undef BLIS_MAX_DEFAULT_MR_C
#define BLIS_MAX_DEFAULT_MR_C BLIS_DEFAULT_4M_MR_C
#endif
// z: Choose between the regular and 4m/3m blocksize.
#define BLIS_MAX_DEFAULT_MR_Z BLIS_DEFAULT_MR_Z
#if BLIS_DEFAULT_4M_MR_Z > BLIS_MAX_DEFAULT_MR_Z
#undef BLIS_MAX_DEFAULT_MR_Z
#define BLIS_MAX_DEFAULT_MR_Z BLIS_DEFAULT_4M_MR_Z
#endif
//
// Find the largest register blocksize NR.
//
#define BLIS_MAX_DEFAULT_NR_S BLIS_DEFAULT_NR_S
#define BLIS_MAX_DEFAULT_NR_D BLIS_DEFAULT_NR_D
// NOTE: 4m and 3m register blocksizes are assumed to be equal. Thus,
// we only inspect the 4m values.
// c: Choose between the regular and 4m/3m blocksize.
#define BLIS_MAX_DEFAULT_NR_C BLIS_DEFAULT_NR_C
#if BLIS_DEFAULT_4M_NR_C > BLIS_MAX_DEFAULT_NR_C
#undef BLIS_MAX_DEFAULT_NR_C
#define BLIS_MAX_DEFAULT_NR_C BLIS_DEFAULT_4M_NR_C
#endif
// z: Choose between the regular and 4m/3m blocksize.
#define BLIS_MAX_DEFAULT_NR_Z BLIS_DEFAULT_NR_Z
#if BLIS_DEFAULT_4M_NR_Z > BLIS_MAX_DEFAULT_NR_Z
#undef BLIS_MAX_DEFAULT_NR_Z
#define BLIS_MAX_DEFAULT_NR_Z BLIS_DEFAULT_4M_NR_Z
#endif
// -- Abbreiviated macros ------------------------------------------------------

View File

@@ -0,0 +1,492 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_KERNEL_PRE_MACRO_DEFS_H
#define BLIS_KERNEL_PRE_MACRO_DEFS_H
// -- Reference kernel definitions ---------------------------------------------
//
// Level-3
//
// gemm micro-kernels
#define BLIS_SGEMM_UKERNEL_REF bli_sgemm_ukr_ref
#define BLIS_DGEMM_UKERNEL_REF bli_dgemm_ukr_ref
#define BLIS_CGEMM_UKERNEL_REF bli_cgemm_ukr_ref
#define BLIS_ZGEMM_UKERNEL_REF bli_zgemm_ukr_ref
// gemmtrsm_l micro-kernels
#define BLIS_SGEMMTRSM_L_UKERNEL_REF bli_sgemmtrsm_l_ukr_ref
#define BLIS_DGEMMTRSM_L_UKERNEL_REF bli_dgemmtrsm_l_ukr_ref
#define BLIS_CGEMMTRSM_L_UKERNEL_REF bli_cgemmtrsm_l_ukr_ref
#define BLIS_ZGEMMTRSM_L_UKERNEL_REF bli_zgemmtrsm_l_ukr_ref
// gemmtrsm_u micro-kernels
#define BLIS_SGEMMTRSM_U_UKERNEL_REF bli_sgemmtrsm_u_ukr_ref
#define BLIS_DGEMMTRSM_U_UKERNEL_REF bli_dgemmtrsm_u_ukr_ref
#define BLIS_CGEMMTRSM_U_UKERNEL_REF bli_cgemmtrsm_u_ukr_ref
#define BLIS_ZGEMMTRSM_U_UKERNEL_REF bli_zgemmtrsm_u_ukr_ref
// trsm_l micro-kernels
#define BLIS_STRSM_L_UKERNEL_REF bli_strsm_l_ukr_ref
#define BLIS_DTRSM_L_UKERNEL_REF bli_dtrsm_l_ukr_ref
#define BLIS_CTRSM_L_UKERNEL_REF bli_ctrsm_l_ukr_ref
#define BLIS_ZTRSM_L_UKERNEL_REF bli_ztrsm_l_ukr_ref
// trsm_u micro-kernels
#define BLIS_STRSM_U_UKERNEL_REF bli_strsm_u_ukr_ref
#define BLIS_DTRSM_U_UKERNEL_REF bli_dtrsm_u_ukr_ref
#define BLIS_CTRSM_U_UKERNEL_REF bli_ctrsm_u_ukr_ref
#define BLIS_ZTRSM_U_UKERNEL_REF bli_ztrsm_u_ukr_ref
//
// Level-3 4m
//
// gemm4m micro-kernels
#define BLIS_CGEMM4M_UKERNEL_REF bli_cgemm4m_ukr_ref
#define BLIS_ZGEMM4M_UKERNEL_REF bli_zgemm4m_ukr_ref
// gemmtrsm4m_l micro-kernels
#define BLIS_CGEMMTRSM4M_L_UKERNEL_REF bli_cgemmtrsm4m_l_ukr_ref
#define BLIS_ZGEMMTRSM4M_L_UKERNEL_REF bli_zgemmtrsm4m_l_ukr_ref
// gemmtrsm4m_u micro-kernels
#define BLIS_CGEMMTRSM4M_U_UKERNEL_REF bli_cgemmtrsm4m_u_ukr_ref
#define BLIS_ZGEMMTRSM4M_U_UKERNEL_REF bli_zgemmtrsm4m_u_ukr_ref
// trsm4m_l micro-kernels
#define BLIS_CTRSM4M_L_UKERNEL_REF bli_ctrsm4m_l_ukr_ref
#define BLIS_ZTRSM4M_L_UKERNEL_REF bli_ztrsm4m_l_ukr_ref
// trsm4m_u micro-kernels
#define BLIS_CTRSM4M_U_UKERNEL_REF bli_ctrsm4m_u_ukr_ref
#define BLIS_ZTRSM4M_U_UKERNEL_REF bli_ztrsm4m_u_ukr_ref
//
// Level-3 3m
//
// gemm3m micro-kernels
#define BLIS_CGEMM3M_UKERNEL_REF bli_cgemm3m_ukr_ref
#define BLIS_ZGEMM3M_UKERNEL_REF bli_zgemm3m_ukr_ref
// gemmtrsm3m_l micro-kernels
#define BLIS_CGEMMTRSM3M_L_UKERNEL_REF bli_cgemmtrsm3m_l_ukr_ref
#define BLIS_ZGEMMTRSM3M_L_UKERNEL_REF bli_zgemmtrsm3m_l_ukr_ref
// gemmtrsm3m_u micro-kernels
#define BLIS_CGEMMTRSM3M_U_UKERNEL_REF bli_cgemmtrsm3m_u_ukr_ref
#define BLIS_ZGEMMTRSM3M_U_UKERNEL_REF bli_zgemmtrsm3m_u_ukr_ref
// trsm3m_l micro-kernels
#define BLIS_CTRSM3M_L_UKERNEL_REF bli_ctrsm3m_l_ukr_ref
#define BLIS_ZTRSM3M_L_UKERNEL_REF bli_ztrsm3m_l_ukr_ref
// trsm3m_u micro-kernels
#define BLIS_CTRSM3M_U_UKERNEL_REF bli_ctrsm3m_u_ukr_ref
#define BLIS_ZTRSM3M_U_UKERNEL_REF bli_ztrsm3m_u_ukr_ref
//
// Level-1m
//
// packm_2xk kernels
#define BLIS_SPACKM_2XK_KERNEL_REF bli_spackm_ref_2xk
#define BLIS_DPACKM_2XK_KERNEL_REF bli_dpackm_ref_2xk
#define BLIS_CPACKM_2XK_KERNEL_REF bli_cpackm_ref_2xk
#define BLIS_ZPACKM_2XK_KERNEL_REF bli_zpackm_ref_2xk
// packm_4xk kernels
#define BLIS_SPACKM_4XK_KERNEL_REF bli_spackm_ref_4xk
#define BLIS_DPACKM_4XK_KERNEL_REF bli_dpackm_ref_4xk
#define BLIS_CPACKM_4XK_KERNEL_REF bli_cpackm_ref_4xk
#define BLIS_ZPACKM_4XK_KERNEL_REF bli_zpackm_ref_4xk
// packm_6xk kernels
#define BLIS_SPACKM_6XK_KERNEL_REF bli_spackm_ref_6xk
#define BLIS_DPACKM_6XK_KERNEL_REF bli_dpackm_ref_6xk
#define BLIS_CPACKM_6XK_KERNEL_REF bli_cpackm_ref_6xk
#define BLIS_ZPACKM_6XK_KERNEL_REF bli_zpackm_ref_6xk
// packm_8xk kernels
#define BLIS_SPACKM_8XK_KERNEL_REF bli_spackm_ref_8xk
#define BLIS_DPACKM_8XK_KERNEL_REF bli_dpackm_ref_8xk
#define BLIS_CPACKM_8XK_KERNEL_REF bli_cpackm_ref_8xk
#define BLIS_ZPACKM_8XK_KERNEL_REF bli_zpackm_ref_8xk
// packm_10xk kernels
#define BLIS_SPACKM_10XK_KERNEL_REF bli_spackm_ref_10xk
#define BLIS_DPACKM_10XK_KERNEL_REF bli_dpackm_ref_10xk
#define BLIS_CPACKM_10XK_KERNEL_REF bli_cpackm_ref_10xk
#define BLIS_ZPACKM_10XK_KERNEL_REF bli_zpackm_ref_10xk
// packm_12xk kernels
#define BLIS_SPACKM_12XK_KERNEL_REF bli_spackm_ref_12xk
#define BLIS_DPACKM_12XK_KERNEL_REF bli_dpackm_ref_12xk
#define BLIS_CPACKM_12XK_KERNEL_REF bli_cpackm_ref_12xk
#define BLIS_ZPACKM_12XK_KERNEL_REF bli_zpackm_ref_12xk
// packm_14xk kernels
#define BLIS_SPACKM_14XK_KERNEL_REF bli_spackm_ref_14xk
#define BLIS_DPACKM_14XK_KERNEL_REF bli_dpackm_ref_14xk
#define BLIS_CPACKM_14XK_KERNEL_REF bli_cpackm_ref_14xk
#define BLIS_ZPACKM_14XK_KERNEL_REF bli_zpackm_ref_14xk
// packm_16xk kernels
#define BLIS_SPACKM_16XK_KERNEL_REF bli_spackm_ref_16xk
#define BLIS_DPACKM_16XK_KERNEL_REF bli_dpackm_ref_16xk
#define BLIS_CPACKM_16XK_KERNEL_REF bli_cpackm_ref_16xk
#define BLIS_ZPACKM_16XK_KERNEL_REF bli_zpackm_ref_16xk
// packm_2xk_ri kernels
#define BLIS_SPACKM_2XK_RI_KERNEL_REF bli_spackm_ref_2xk_ri
#define BLIS_DPACKM_2XK_RI_KERNEL_REF bli_dpackm_ref_2xk_ri
#define BLIS_CPACKM_2XK_RI_KERNEL_REF bli_cpackm_ref_2xk_ri
#define BLIS_ZPACKM_2XK_RI_KERNEL_REF bli_zpackm_ref_2xk_ri
// packm_4xk_ri kernels
#define BLIS_SPACKM_4XK_RI_KERNEL_REF bli_spackm_ref_4xk_ri
#define BLIS_DPACKM_4XK_RI_KERNEL_REF bli_dpackm_ref_4xk_ri
#define BLIS_CPACKM_4XK_RI_KERNEL_REF bli_cpackm_ref_4xk_ri
#define BLIS_ZPACKM_4XK_RI_KERNEL_REF bli_zpackm_ref_4xk_ri
// packm_6xk_ri kernels
#define BLIS_SPACKM_6XK_RI_KERNEL_REF bli_spackm_ref_6xk_ri
#define BLIS_DPACKM_6XK_RI_KERNEL_REF bli_dpackm_ref_6xk_ri
#define BLIS_CPACKM_6XK_RI_KERNEL_REF bli_cpackm_ref_6xk_ri
#define BLIS_ZPACKM_6XK_RI_KERNEL_REF bli_zpackm_ref_6xk_ri
// packm_8xk_ri kernels
#define BLIS_SPACKM_8XK_RI_KERNEL_REF bli_spackm_ref_8xk_ri
#define BLIS_DPACKM_8XK_RI_KERNEL_REF bli_dpackm_ref_8xk_ri
#define BLIS_CPACKM_8XK_RI_KERNEL_REF bli_cpackm_ref_8xk_ri
#define BLIS_ZPACKM_8XK_RI_KERNEL_REF bli_zpackm_ref_8xk_ri
// packm_10xk_ri kernels
#define BLIS_SPACKM_10XK_RI_KERNEL_REF bli_spackm_ref_10xk_ri
#define BLIS_DPACKM_10XK_RI_KERNEL_REF bli_dpackm_ref_10xk_ri
#define BLIS_CPACKM_10XK_RI_KERNEL_REF bli_cpackm_ref_10xk_ri
#define BLIS_ZPACKM_10XK_RI_KERNEL_REF bli_zpackm_ref_10xk_ri
// packm_12xk_ri kernels
#define BLIS_SPACKM_12XK_RI_KERNEL_REF bli_spackm_ref_12xk_ri
#define BLIS_DPACKM_12XK_RI_KERNEL_REF bli_dpackm_ref_12xk_ri
#define BLIS_CPACKM_12XK_RI_KERNEL_REF bli_cpackm_ref_12xk_ri
#define BLIS_ZPACKM_12XK_RI_KERNEL_REF bli_zpackm_ref_12xk_ri
// packm_14xk_ri kernels
#define BLIS_SPACKM_14XK_RI_KERNEL_REF bli_spackm_ref_14xk_ri
#define BLIS_DPACKM_14XK_RI_KERNEL_REF bli_dpackm_ref_14xk_ri
#define BLIS_CPACKM_14XK_RI_KERNEL_REF bli_cpackm_ref_14xk_ri
#define BLIS_ZPACKM_14XK_RI_KERNEL_REF bli_zpackm_ref_14xk_ri
// packm_16xk_ri kernels
#define BLIS_SPACKM_16XK_RI_KERNEL_REF bli_spackm_ref_16xk_ri
#define BLIS_DPACKM_16XK_RI_KERNEL_REF bli_dpackm_ref_16xk_ri
#define BLIS_CPACKM_16XK_RI_KERNEL_REF bli_cpackm_ref_16xk_ri
#define BLIS_ZPACKM_16XK_RI_KERNEL_REF bli_zpackm_ref_16xk_ri
// packm_2xk_ri3 kernels
#define BLIS_SPACKM_2XK_RI3_KERNEL_REF bli_spackm_ref_2xk_ri3
#define BLIS_DPACKM_2XK_RI3_KERNEL_REF bli_dpackm_ref_2xk_ri3
#define BLIS_CPACKM_2XK_RI3_KERNEL_REF bli_cpackm_ref_2xk_ri3
#define BLIS_ZPACKM_2XK_RI3_KERNEL_REF bli_zpackm_ref_2xk_ri3
// packm_4xk_ri3 kernels
#define BLIS_SPACKM_4XK_RI3_KERNEL_REF bli_spackm_ref_4xk_ri3
#define BLIS_DPACKM_4XK_RI3_KERNEL_REF bli_dpackm_ref_4xk_ri3
#define BLIS_CPACKM_4XK_RI3_KERNEL_REF bli_cpackm_ref_4xk_ri3
#define BLIS_ZPACKM_4XK_RI3_KERNEL_REF bli_zpackm_ref_4xk_ri3
// packm_6xk_ri3 kernels
#define BLIS_SPACKM_6XK_RI3_KERNEL_REF bli_spackm_ref_6xk_ri3
#define BLIS_DPACKM_6XK_RI3_KERNEL_REF bli_dpackm_ref_6xk_ri3
#define BLIS_CPACKM_6XK_RI3_KERNEL_REF bli_cpackm_ref_6xk_ri3
#define BLIS_ZPACKM_6XK_RI3_KERNEL_REF bli_zpackm_ref_6xk_ri3
// packm_8xk_ri3 kernels
#define BLIS_SPACKM_8XK_RI3_KERNEL_REF bli_spackm_ref_8xk_ri3
#define BLIS_DPACKM_8XK_RI3_KERNEL_REF bli_dpackm_ref_8xk_ri3
#define BLIS_CPACKM_8XK_RI3_KERNEL_REF bli_cpackm_ref_8xk_ri3
#define BLIS_ZPACKM_8XK_RI3_KERNEL_REF bli_zpackm_ref_8xk_ri3
// packm_10xk_ri3 kernels
#define BLIS_SPACKM_10XK_RI3_KERNEL_REF bli_spackm_ref_10xk_ri3
#define BLIS_DPACKM_10XK_RI3_KERNEL_REF bli_dpackm_ref_10xk_ri3
#define BLIS_CPACKM_10XK_RI3_KERNEL_REF bli_cpackm_ref_10xk_ri3
#define BLIS_ZPACKM_10XK_RI3_KERNEL_REF bli_zpackm_ref_10xk_ri3
// packm_12xk_ri3 kernels
#define BLIS_SPACKM_12XK_RI3_KERNEL_REF bli_spackm_ref_12xk_ri3
#define BLIS_DPACKM_12XK_RI3_KERNEL_REF bli_dpackm_ref_12xk_ri3
#define BLIS_CPACKM_12XK_RI3_KERNEL_REF bli_cpackm_ref_12xk_ri3
#define BLIS_ZPACKM_12XK_RI3_KERNEL_REF bli_zpackm_ref_12xk_ri3
// packm_14xk_ri3 kernels
#define BLIS_SPACKM_14XK_RI3_KERNEL_REF bli_spackm_ref_14xk_ri3
#define BLIS_DPACKM_14XK_RI3_KERNEL_REF bli_dpackm_ref_14xk_ri3
#define BLIS_CPACKM_14XK_RI3_KERNEL_REF bli_cpackm_ref_14xk_ri3
#define BLIS_ZPACKM_14XK_RI3_KERNEL_REF bli_zpackm_ref_14xk_ri3
// packm_16xk_ri3 kernels
#define BLIS_SPACKM_16XK_RI3_KERNEL_REF bli_spackm_ref_16xk_ri3
#define BLIS_DPACKM_16XK_RI3_KERNEL_REF bli_dpackm_ref_16xk_ri3
#define BLIS_CPACKM_16XK_RI3_KERNEL_REF bli_cpackm_ref_16xk_ri3
#define BLIS_ZPACKM_16XK_RI3_KERNEL_REF bli_zpackm_ref_16xk_ri3
// unpack_2xk kernels
#define BLIS_SUNPACKM_2XK_KERNEL_REF bli_sunpackm_ref_2xk
#define BLIS_DUNPACKM_2XK_KERNEL_REF bli_dunpackm_ref_2xk
#define BLIS_CUNPACKM_2XK_KERNEL_REF bli_cunpackm_ref_2xk
#define BLIS_ZUNPACKM_2XK_KERNEL_REF bli_zunpackm_ref_2xk
// unpack_4xk kernels
#define BLIS_SUNPACKM_4XK_KERNEL_REF bli_sunpackm_ref_4xk
#define BLIS_DUNPACKM_4XK_KERNEL_REF bli_dunpackm_ref_4xk
#define BLIS_CUNPACKM_4XK_KERNEL_REF bli_cunpackm_ref_4xk
#define BLIS_ZUNPACKM_4XK_KERNEL_REF bli_zunpackm_ref_4xk
// unpack_6xk kernels
#define BLIS_SUNPACKM_6XK_KERNEL_REF bli_sunpackm_ref_6xk
#define BLIS_DUNPACKM_6XK_KERNEL_REF bli_dunpackm_ref_6xk
#define BLIS_CUNPACKM_6XK_KERNEL_REF bli_cunpackm_ref_6xk
#define BLIS_ZUNPACKM_6XK_KERNEL_REF bli_zunpackm_ref_6xk
// unpack_8xk kernels
#define BLIS_SUNPACKM_8XK_KERNEL_REF bli_sunpackm_ref_8xk
#define BLIS_DUNPACKM_8XK_KERNEL_REF bli_dunpackm_ref_8xk
#define BLIS_CUNPACKM_8XK_KERNEL_REF bli_cunpackm_ref_8xk
#define BLIS_ZUNPACKM_8XK_KERNEL_REF bli_zunpackm_ref_8xk
// unpack_10xk kernels
#define BLIS_SUNPACKM_10XK_KERNEL_REF bli_sunpackm_ref_10xk
#define BLIS_DUNPACKM_10XK_KERNEL_REF bli_dunpackm_ref_10xk
#define BLIS_CUNPACKM_10XK_KERNEL_REF bli_cunpackm_ref_10xk
#define BLIS_ZUNPACKM_10XK_KERNEL_REF bli_zunpackm_ref_10xk
// unpack_12xk kernels
#define BLIS_SUNPACKM_12XK_KERNEL_REF bli_sunpackm_ref_12xk
#define BLIS_DUNPACKM_12XK_KERNEL_REF bli_dunpackm_ref_12xk
#define BLIS_CUNPACKM_12XK_KERNEL_REF bli_cunpackm_ref_12xk
#define BLIS_ZUNPACKM_12XK_KERNEL_REF bli_zunpackm_ref_12xk
// unpack_14xk kernels
#define BLIS_SUNPACKM_14XK_KERNEL_REF bli_sunpackm_ref_14xk
#define BLIS_DUNPACKM_14XK_KERNEL_REF bli_dunpackm_ref_14xk
#define BLIS_CUNPACKM_14XK_KERNEL_REF bli_cunpackm_ref_14xk
#define BLIS_ZUNPACKM_14XK_KERNEL_REF bli_zunpackm_ref_14xk
// unpack_16xk kernels
#define BLIS_SUNPACKM_16XK_KERNEL_REF bli_sunpackm_ref_16xk
#define BLIS_DUNPACKM_16XK_KERNEL_REF bli_dunpackm_ref_16xk
#define BLIS_CUNPACKM_16XK_KERNEL_REF bli_cunpackm_ref_16xk
#define BLIS_ZUNPACKM_16XK_KERNEL_REF bli_zunpackm_ref_16xk
//
// Level-1f
//
// axpy2v kernels
#define BLIS_SAXPY2V_KERNEL_REF bli_sssaxpy2v_ref
#define BLIS_DAXPY2V_KERNEL_REF bli_dddaxpy2v_ref
#define BLIS_CAXPY2V_KERNEL_REF bli_cccaxpy2v_ref
#define BLIS_ZAXPY2V_KERNEL_REF bli_zzzaxpy2v_ref
// dotaxpyv kernels
#define BLIS_SDOTAXPYV_KERNEL_REF bli_sssdotaxpyv_ref
#define BLIS_DDOTAXPYV_KERNEL_REF bli_ddddotaxpyv_ref
#define BLIS_CDOTAXPYV_KERNEL_REF bli_cccdotaxpyv_ref
#define BLIS_ZDOTAXPYV_KERNEL_REF bli_zzzdotaxpyv_ref
// axpyf kernels
#define BLIS_SAXPYF_KERNEL_REF bli_sssaxpyf_ref
#define BLIS_DAXPYF_KERNEL_REF bli_dddaxpyf_ref
#define BLIS_CAXPYF_KERNEL_REF bli_cccaxpyf_ref
#define BLIS_ZAXPYF_KERNEL_REF bli_zzzaxpyf_ref
// dotxf kernels
#define BLIS_SDOTXF_KERNEL_REF bli_sssdotxf_ref
#define BLIS_DDOTXF_KERNEL_REF bli_ddddotxf_ref
#define BLIS_CDOTXF_KERNEL_REF bli_cccdotxf_ref
#define BLIS_ZDOTXF_KERNEL_REF bli_zzzdotxf_ref
// dotxaxpyf kernels
//#define BLIS_SDOTXAXPYF_KERNEL_REF bli_sssdotxaxpyf_ref_var1
//#define BLIS_DDOTXAXPYF_KERNEL_REF bli_ddddotxaxpyf_ref_var1
//#define BLIS_CDOTXAXPYF_KERNEL_REF bli_cccdotxaxpyf_ref_var1
//#define BLIS_ZDOTXAXPYF_KERNEL_REF bli_zzzdotxaxpyf_ref_var1
#define BLIS_SDOTXAXPYF_KERNEL_REF bli_sssdotxaxpyf_ref_var2
#define BLIS_DDOTXAXPYF_KERNEL_REF bli_ddddotxaxpyf_ref_var2
#define BLIS_CDOTXAXPYF_KERNEL_REF bli_cccdotxaxpyf_ref_var2
#define BLIS_ZDOTXAXPYF_KERNEL_REF bli_zzzdotxaxpyf_ref_var2
//
// Level-1v
//
// addv kernels
#define BLIS_SADDV_KERNEL_REF bli_ssaddv_ref
#define BLIS_DADDV_KERNEL_REF bli_ddaddv_ref
#define BLIS_CADDV_KERNEL_REF bli_ccaddv_ref
#define BLIS_ZADDV_KERNEL_REF bli_zzaddv_ref
// axpyv kernels
#define BLIS_SAXPYV_KERNEL_REF bli_sssaxpyv_ref
#define BLIS_DAXPYV_KERNEL_REF bli_dddaxpyv_ref
#define BLIS_CAXPYV_KERNEL_REF bli_cccaxpyv_ref
#define BLIS_ZAXPYV_KERNEL_REF bli_zzzaxpyv_ref
// copyv kernels
#define BLIS_SCOPYV_KERNEL_REF bli_sscopyv_ref
#define BLIS_DCOPYV_KERNEL_REF bli_ddcopyv_ref
#define BLIS_CCOPYV_KERNEL_REF bli_cccopyv_ref
#define BLIS_ZCOPYV_KERNEL_REF bli_zzcopyv_ref
// dotv kernels
#define BLIS_SDOTV_KERNEL_REF bli_sssdotv_ref
#define BLIS_DDOTV_KERNEL_REF bli_ddddotv_ref
#define BLIS_CDOTV_KERNEL_REF bli_cccdotv_ref
#define BLIS_ZDOTV_KERNEL_REF bli_zzzdotv_ref
// dotxv kernels
#define BLIS_SDOTXV_KERNEL_REF bli_sssdotxv_ref
#define BLIS_DDOTXV_KERNEL_REF bli_ddddotxv_ref
#define BLIS_CDOTXV_KERNEL_REF bli_cccdotxv_ref
#define BLIS_ZDOTXV_KERNEL_REF bli_zzzdotxv_ref
// invertv kernels
#define BLIS_SINVERTV_KERNEL_REF bli_sinvertv_ref
#define BLIS_DINVERTV_KERNEL_REF bli_dinvertv_ref
#define BLIS_CINVERTV_KERNEL_REF bli_cinvertv_ref
#define BLIS_ZINVERTV_KERNEL_REF bli_zinvertv_ref
// scal2v kernels
#define BLIS_SSCAL2V_KERNEL_REF bli_sssscal2v_ref
#define BLIS_DSCAL2V_KERNEL_REF bli_dddscal2v_ref
#define BLIS_CSCAL2V_KERNEL_REF bli_cccscal2v_ref
#define BLIS_ZSCAL2V_KERNEL_REF bli_zzzscal2v_ref
// scalv kernels
#define BLIS_SSCALV_KERNEL_REF bli_ssscalv_ref
#define BLIS_DSCALV_KERNEL_REF bli_ddscalv_ref
#define BLIS_CSCALV_KERNEL_REF bli_ccscalv_ref
#define BLIS_ZSCALV_KERNEL_REF bli_zzscalv_ref
// setv kernels
#define BLIS_SSETV_KERNEL_REF bli_sssetv_ref
#define BLIS_DSETV_KERNEL_REF bli_ddsetv_ref
#define BLIS_CSETV_KERNEL_REF bli_ccsetv_ref
#define BLIS_ZSETV_KERNEL_REF bli_zzsetv_ref
// subv kernels
#define BLIS_SSUBV_KERNEL_REF bli_sssubv_ref
#define BLIS_DSUBV_KERNEL_REF bli_ddsubv_ref
#define BLIS_CSUBV_KERNEL_REF bli_ccsubv_ref
#define BLIS_ZSUBV_KERNEL_REF bli_zzsubv_ref
// swapv kernels
#define BLIS_SSWAPV_KERNEL_REF bli_ssswapv_ref
#define BLIS_DSWAPV_KERNEL_REF bli_ddswapv_ref
#define BLIS_CSWAPV_KERNEL_REF bli_ccswapv_ref
#define BLIS_ZSWAPV_KERNEL_REF bli_zzswapv_ref
#endif

View File

@@ -0,0 +1,741 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_KERNEL_PROTOTYPES_H
#define BLIS_KERNEL_PROTOTYPES_H
// -- Define PASTEMAC-friendly kernel function name macros ---------------------
//
// Level-3
//
// gemm micro-kernels
#define bli_sGEMM_UKERNEL BLIS_SGEMM_UKERNEL
#define bli_dGEMM_UKERNEL BLIS_DGEMM_UKERNEL
#define bli_cGEMM_UKERNEL BLIS_CGEMM_UKERNEL
#define bli_zGEMM_UKERNEL BLIS_ZGEMM_UKERNEL
#undef GENTPROT
#define GENTPROT( ctype, ch, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( GEMM_UKERNEL )
// gemmtrsm_l micro-kernels
#define bli_sGEMMTRSM_L_UKERNEL BLIS_SGEMMTRSM_L_UKERNEL
#define bli_dGEMMTRSM_L_UKERNEL BLIS_DGEMMTRSM_L_UKERNEL
#define bli_cGEMMTRSM_L_UKERNEL BLIS_CGEMMTRSM_L_UKERNEL
#define bli_zGEMMTRSM_L_UKERNEL BLIS_ZGEMMTRSM_L_UKERNEL
#undef GENTPROT
#define GENTPROT( ctype, ch, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a10, \
ctype* restrict a11, \
ctype* restrict b01, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( GEMMTRSM_L_UKERNEL )
// gemmtrsm_u micro-kernels
#define bli_sGEMMTRSM_U_UKERNEL BLIS_SGEMMTRSM_U_UKERNEL
#define bli_dGEMMTRSM_U_UKERNEL BLIS_DGEMMTRSM_U_UKERNEL
#define bli_cGEMMTRSM_U_UKERNEL BLIS_CGEMMTRSM_U_UKERNEL
#define bli_zGEMMTRSM_U_UKERNEL BLIS_ZGEMMTRSM_U_UKERNEL
#undef GENTPROT
#define GENTPROT( ctype, ch, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a12, \
ctype* restrict a11, \
ctype* restrict b21, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( GEMMTRSM_U_UKERNEL )
// trsm_l micro-kernels
#define bli_sTRSM_L_UKERNEL BLIS_STRSM_L_UKERNEL
#define bli_dTRSM_L_UKERNEL BLIS_DTRSM_L_UKERNEL
#define bli_cTRSM_L_UKERNEL BLIS_CTRSM_L_UKERNEL
#define bli_zTRSM_L_UKERNEL BLIS_ZTRSM_L_UKERNEL
#undef GENTPROT
#define GENTPROT( ctype, ch, kername ) \
\
void PASTEMAC(ch,kername) \
( \
ctype* restrict a11, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( TRSM_L_UKERNEL )
// trsm_u micro-kernels
#define bli_sTRSM_U_UKERNEL BLIS_STRSM_U_UKERNEL
#define bli_dTRSM_U_UKERNEL BLIS_DTRSM_U_UKERNEL
#define bli_cTRSM_U_UKERNEL BLIS_CTRSM_U_UKERNEL
#define bli_zTRSM_U_UKERNEL BLIS_ZTRSM_U_UKERNEL
#undef GENTPROT
#define GENTPROT( ctype, ch, kername ) \
\
void PASTEMAC(ch,kername) \
( \
ctype* restrict a11, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( TRSM_U_UKERNEL )
//
// Level-3 4m
//
// gemm4m micro-kernels
#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL
#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( GEMM4M_UKERNEL )
// gemmtrsm4m_l micro-kernels
#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL
#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a10, \
ctype* restrict a11, \
ctype* restrict b01, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( GEMMTRSM4M_L_UKERNEL )
// gemmtrsm4m_u micro-kernels
#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL
#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a12, \
ctype* restrict a11, \
ctype* restrict b21, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( GEMMTRSM4M_U_UKERNEL )
// trsm4m_l micro-kernels
#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL
#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
ctype_r* restrict a11r, \
ctype_r* restrict b11r, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( TRSM4M_L_UKERNEL )
// trsm4m_u micro-kernels
#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL
#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
ctype_r* restrict a11r, \
ctype_r* restrict b11r, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( TRSM4M_U_UKERNEL )
//
// Level-3 3m
//
// gemm3m micro-kernels
#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL
#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( GEMM3M_UKERNEL )
// gemmtrsm3m_l micro-kernels
#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL
#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a10, \
ctype* restrict a11, \
ctype* restrict b01, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( GEMMTRSM3M_L_UKERNEL )
// gemmtrsm3m_u micro-kernels
#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL
#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a12, \
ctype* restrict a11, \
ctype* restrict b21, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( GEMMTRSM3M_U_UKERNEL )
// trsm3m_l micro-kernels
#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL
#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
ctype_r* restrict a11r, \
ctype_r* restrict b11r, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( TRSM3M_L_UKERNEL )
// trsm3m_u micro-kernels
#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL
#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL
#undef GENTPROTCO
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
\
void PASTEMAC(ch,kername) \
( \
ctype_r* restrict a11r, \
ctype_r* restrict b11r, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROTCO_BASIC( TRSM3M_U_UKERNEL )
//
// Level-1m
//
// NOTE: We don't need any PASTEMAC-friendly aliases to packm kernel
// macros because they are used directly in the initialization of the
// function pointer array, rather than via a templatizing wrapper macro.
//
// Level-1f
//
// axpy2v kernels
#define bli_sssAXPY2V_KERNEL BLIS_SAXPY2V_KERNEL
#define bli_dddAXPY2V_KERNEL BLIS_DAXPY2V_KERNEL
#define bli_cccAXPY2V_KERNEL BLIS_CAXPY2V_KERNEL
#define bli_zzzAXPY2V_KERNEL BLIS_ZAXPY2V_KERNEL
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, kername ) \
\
void PASTEMAC3(chx,chy,chz,kername) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype_xy* restrict alpha1, \
ctype_xy* restrict alpha2, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy, \
ctype_z* restrict z, inc_t incz \
);
INSERT_GENTPROT3U12_BASIC( AXPY2V_KERNEL )
// dotaxpyv kernels
#define bli_sssDOTAXPYV_KERNEL BLIS_SDOTAXPYV_KERNEL
#define bli_dddDOTAXPYV_KERNEL BLIS_DDOTAXPYV_KERNEL
#define bli_cccDOTAXPYV_KERNEL BLIS_CDOTAXPYV_KERNEL
#define bli_zzzDOTAXPYV_KERNEL BLIS_ZDOTAXPYV_KERNEL
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, kername ) \
\
void PASTEMAC3(chx,chy,chz,kername) \
( \
conj_t conjxt, \
conj_t conjx, \
conj_t conjy, \
dim_t m, \
ctype_x* restrict alpha, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy, \
ctype_xy* restrict rho, \
ctype_z* restrict z, inc_t incz \
);
INSERT_GENTPROT3U12_BASIC( DOTAXPYV_KERNEL )
// axpyf kernels
#define bli_sssAXPYF_KERNEL BLIS_SAXPYF_KERNEL
#define bli_dddAXPYF_KERNEL BLIS_DAXPYF_KERNEL
#define bli_cccAXPYF_KERNEL BLIS_CAXPYF_KERNEL
#define bli_zzzAXPYF_KERNEL BLIS_ZAXPYF_KERNEL
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, kername ) \
\
void PASTEMAC3(cha,chx,chy,kername) \
( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ax* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3U12_BASIC( AXPYF_KERNEL )
// dotxf kernels
#define bli_sssDOTXF_KERNEL BLIS_SDOTXF_KERNEL
#define bli_dddDOTXF_KERNEL BLIS_DDOTXF_KERNEL
#define bli_cccDOTXF_KERNEL BLIS_CDOTXF_KERNEL
#define bli_zzzDOTXF_KERNEL BLIS_ZDOTXF_KERNEL
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, kername ) \
\
void PASTEMAC3(cha,chx,chy,kername) \
( \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ax* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict beta, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3U12_BASIC( DOTXF_KERNEL )
// dotxaxpyf kernels
#define bli_sssDOTXAXPYF_KERNEL BLIS_SDOTXAXPYF_KERNEL
#define bli_dddDOTXAXPYF_KERNEL BLIS_DDOTXAXPYF_KERNEL
#define bli_cccDOTXAXPYF_KERNEL BLIS_CDOTXAXPYF_KERNEL
#define bli_zzzDOTXAXPYF_KERNEL BLIS_ZDOTXAXPYF_KERNEL
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, kername ) \
\
void PASTEMAC3(cha,chb,chc,kername) \
( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ab* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_b* restrict w, inc_t incw, \
ctype_b* restrict x, inc_t incx, \
ctype_c* restrict beta, \
ctype_c* restrict y, inc_t incy, \
ctype_c* restrict z, inc_t incz \
);
INSERT_GENTPROT3U12_BASIC( DOTXAXPYF_KERNEL )
//
// Level-1v
//
// addv kernels
#define bli_ssADDV_KERNEL BLIS_SADDV_KERNEL
#define bli_ddADDV_KERNEL BLIS_DADDV_KERNEL
#define bli_ccADDV_KERNEL BLIS_CADDV_KERNEL
#define bli_zzADDV_KERNEL BLIS_ZADDV_KERNEL
#undef GENTPROT2
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
\
void PASTEMAC2(chx,chy,kername) \
( \
conj_t conjx, \
dim_t n, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT2_BASIC( ADDV_KERNEL )
// axpyv kernels
#define bli_sssAXPYV_KERNEL BLIS_SAXPYV_KERNEL
#define bli_dddAXPYV_KERNEL BLIS_DAXPYV_KERNEL
#define bli_cccAXPYV_KERNEL BLIS_CAXPYV_KERNEL
#define bli_zzzAXPYV_KERNEL BLIS_ZAXPYV_KERNEL
#undef GENTPROT3
#define GENTPROT3( ctype_a, ctype_x, ctype_y, cha, chx, chy, kername ) \
\
void PASTEMAC3(cha,chx,chy,kername) \
( \
conj_t conjx, \
dim_t n, \
ctype_a* restrict alpha, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3_BASIC( AXPYV_KERNEL )
// copyv kernels
#define bli_ssCOPYV_KERNEL BLIS_SCOPYV_KERNEL
#define bli_ddCOPYV_KERNEL BLIS_DCOPYV_KERNEL
#define bli_ccCOPYV_KERNEL BLIS_CCOPYV_KERNEL
#define bli_zzCOPYV_KERNEL BLIS_ZCOPYV_KERNEL
#undef GENTPROT2
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
\
void PASTEMAC2(chx,chy,kername) \
( \
conj_t conjx, \
dim_t n, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT2_BASIC( COPYV_KERNEL )
// dotv kernels
#define bli_sssDOTV_KERNEL BLIS_SDOTV_KERNEL
#define bli_dddDOTV_KERNEL BLIS_DDOTV_KERNEL
#define bli_cccDOTV_KERNEL BLIS_CDOTV_KERNEL
#define bli_zzzDOTV_KERNEL BLIS_ZDOTV_KERNEL
#undef GENTPROT3
#define GENTPROT3( ctype_x, ctype_y, ctype_r, chx, chy, chr, kername ) \
\
void PASTEMAC3(chx,chy,chr,kername) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy, \
ctype_r* restrict rho \
);
INSERT_GENTPROT3_BASIC( DOTV_KERNEL )
// dotxv kernels
#define bli_sssDOTXV_KERNEL BLIS_SDOTXV_KERNEL
#define bli_dddDOTXV_KERNEL BLIS_DDOTXV_KERNEL
#define bli_cccDOTXV_KERNEL BLIS_CDOTXV_KERNEL
#define bli_zzzDOTXV_KERNEL BLIS_ZDOTXV_KERNEL
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_x, ctype_y, ctype_r, ctype_xy, chx, chy, chr, chxy, kername ) \
\
void PASTEMAC3(chx,chy,chr,kername) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype_xy* restrict alpha, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy, \
ctype_r* restrict beta, \
ctype_r* restrict rho \
);
INSERT_GENTPROT3U12_BASIC( DOTXV_KERNEL )
// invertv kernels
#define bli_sINVERTV_KERNEL BLIS_SINVERTV_KERNEL
#define bli_dINVERTV_KERNEL BLIS_DINVERTV_KERNEL
#define bli_cINVERTV_KERNEL BLIS_CINVERTV_KERNEL
#define bli_zINVERTV_KERNEL BLIS_ZINVERTV_KERNEL
#undef GENTPROT
#define GENTPROT( ctype, ch, kername ) \
\
void PASTEMAC(ch,kername) \
( \
dim_t n, \
ctype* restrict x, inc_t incx \
);
INSERT_GENTPROT_BASIC( INVERTV_KERNEL )
// scal2v kernels
#define bli_sssSCAL2V_KERNEL BLIS_SSCAL2V_KERNEL
#define bli_dddSCAL2V_KERNEL BLIS_DSCAL2V_KERNEL
#define bli_cccSCAL2V_KERNEL BLIS_CSCAL2V_KERNEL
#define bli_zzzSCAL2V_KERNEL BLIS_ZSCAL2V_KERNEL
#undef GENTPROT3
#define GENTPROT3( ctype_b, ctype_x, ctype_y, chb, chx, chy, kername ) \
\
void PASTEMAC3(chb,chx,chy,kername) \
( \
conj_t conjx, \
dim_t n, \
ctype_b* restrict beta, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3_BASIC( SCAL2V_KERNEL )
// scalv kernels
#define bli_ssSCALV_KERNEL BLIS_SSCALV_KERNEL
#define bli_ddSCALV_KERNEL BLIS_DSCALV_KERNEL
#define bli_ccSCALV_KERNEL BLIS_CSCALV_KERNEL
#define bli_zzSCALV_KERNEL BLIS_ZSCALV_KERNEL
#undef GENTPROT2
#define GENTPROT2( ctype_b, ctype_x, chb, chx, kername ) \
\
void PASTEMAC2(chb,chx,kername) \
( \
conj_t conjbeta, \
dim_t n, \
ctype_b* restrict beta, \
ctype_x* restrict x, inc_t incx \
);
INSERT_GENTPROT2_BASIC( SCALV_KERNEL )
// setv kernels
#define bli_ssSETV_KERNEL BLIS_SSETV_KERNEL
#define bli_ddSETV_KERNEL BLIS_DSETV_KERNEL
#define bli_ccSETV_KERNEL BLIS_CSETV_KERNEL
#define bli_zzSETV_KERNEL BLIS_ZSETV_KERNEL
#undef GENTPROT2
#define GENTPROT2( ctype_b, ctype_x, chb, chx, kername ) \
\
void PASTEMAC2(chb,chx,kername) \
( \
dim_t n, \
ctype_b* restrict beta, \
ctype_x* restrict x, inc_t incx \
);
INSERT_GENTPROT2_BASIC( SETV_KERNEL )
// subv kernels
#define bli_ssSUBV_KERNEL BLIS_SSUBV_KERNEL
#define bli_ddSUBV_KERNEL BLIS_DSUBV_KERNEL
#define bli_ccSUBV_KERNEL BLIS_CSUBV_KERNEL
#define bli_zzSUBV_KERNEL BLIS_ZSUBV_KERNEL
#undef GENTPROT2
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
\
void PASTEMAC2(chx,chy,kername) \
( \
conj_t conjx, \
dim_t n, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT2_BASIC( SUBV_KERNEL )
// swapv kernels
#define bli_ssSWAPV_KERNEL BLIS_SSWAPV_KERNEL
#define bli_ddSWAPV_KERNEL BLIS_DSWAPV_KERNEL
#define bli_ccSWAPV_KERNEL BLIS_CSWAPV_KERNEL
#define bli_zzSWAPV_KERNEL BLIS_ZSWAPV_KERNEL
#undef GENTPROT2
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
\
void PASTEMAC2(chx,chy,kername) \
( \
dim_t n, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT2_BASIC( SWAPV_KERNEL )
#endif

View File

@@ -114,6 +114,7 @@
#define BLIS_DEFAULT_MAXR_Z BLIS_DEFAULT_NR_Z
#endif
// Next, we define the dimensions of the pool blocks for each datatype.
//
@@ -156,6 +157,47 @@
#define BLIS_POOL_KC_Z ( ( BLIS_MAXIMUM_KC_Z * BLIS_PACKDIM_KR_Z ) \
/ BLIS_DEFAULT_KR_Z )
//
// Compute pool dimensions for single complex (4m)
//
#define BLIS_POOL_4M_MC_C ( ( BLIS_MAXIMUM_4M_MC_C * BLIS_PACKDIM_MAXR_S ) \
/ BLIS_DEFAULT_MAXR_S )
#define BLIS_POOL_4M_NC_C ( ( BLIS_MAXIMUM_4M_NC_C * BLIS_PACKDIM_MAXR_S ) \
/ BLIS_DEFAULT_MAXR_S )
#define BLIS_POOL_4M_KC_C ( ( BLIS_MAXIMUM_4M_KC_C * BLIS_PACKDIM_KR_S ) \
/ BLIS_DEFAULT_KR_S )
//
// Compute pool dimensions for double complex (4m)
//
#define BLIS_POOL_4M_MC_Z ( ( BLIS_MAXIMUM_4M_MC_Z * BLIS_PACKDIM_MAXR_D ) \
/ BLIS_DEFAULT_MAXR_D )
#define BLIS_POOL_4M_NC_Z ( ( BLIS_MAXIMUM_4M_NC_Z * BLIS_PACKDIM_MAXR_D ) \
/ BLIS_DEFAULT_MAXR_D )
#define BLIS_POOL_4M_KC_Z ( ( BLIS_MAXIMUM_4M_KC_Z * BLIS_PACKDIM_KR_D ) \
/ BLIS_DEFAULT_KR_D )
//
// Compute pool dimensions for single complex (3m)
//
#define BLIS_POOL_3M_MC_C ( ( BLIS_MAXIMUM_3M_MC_C * BLIS_PACKDIM_MAXR_S ) \
/ BLIS_DEFAULT_MAXR_S )
#define BLIS_POOL_3M_NC_C ( ( BLIS_MAXIMUM_3M_NC_C * BLIS_PACKDIM_MAXR_S ) \
/ BLIS_DEFAULT_MAXR_S )
#define BLIS_POOL_3M_KC_C ( ( BLIS_MAXIMUM_3M_KC_C * BLIS_PACKDIM_KR_S ) \
/ BLIS_DEFAULT_KR_S )
//
// Compute pool dimensions for double complex (3m)
//
#define BLIS_POOL_3M_MC_Z ( ( BLIS_MAXIMUM_3M_MC_Z * BLIS_PACKDIM_MAXR_D ) \
/ BLIS_DEFAULT_MAXR_D )
#define BLIS_POOL_3M_NC_Z ( ( BLIS_MAXIMUM_3M_NC_Z * BLIS_PACKDIM_MAXR_D ) \
/ BLIS_DEFAULT_MAXR_D )
#define BLIS_POOL_3M_KC_Z ( ( BLIS_MAXIMUM_3M_KC_Z * BLIS_PACKDIM_KR_D ) \
/ BLIS_DEFAULT_KR_D )
// Now, we compute the size of each block/panel of A, B, and C for each
// datatype.
@@ -168,19 +210,12 @@
//
// Compute memory pool block sizes for single real.
//
#define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \
( BLIS_POOL_KC_S + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_S \
) \
) * \
BLIS_POOL_KC_S * \
BLIS_SIZEOF_S \
)
#define BLIS_KN_BLOCK_SIZE_S ( ( BLIS_POOL_KC_S + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_S \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_S ( BLIS_POOL_KC_S * \
BLIS_POOL_NC_S * \
BLIS_SIZEOF_S \
)
@@ -192,19 +227,12 @@
//
// Compute memory pool block sizes for double real.
//
#define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \
( BLIS_POOL_KC_D + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_D \
) \
) * \
BLIS_POOL_KC_D * \
BLIS_SIZEOF_D \
)
#define BLIS_KN_BLOCK_SIZE_D ( ( BLIS_POOL_KC_D + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_D \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_D ( BLIS_POOL_KC_D * \
BLIS_POOL_NC_D * \
BLIS_SIZEOF_D \
)
@@ -216,19 +244,12 @@
//
// Compute memory pool block sizes for single complex.
//
#define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \
( BLIS_POOL_KC_C + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_C \
) \
) * \
BLIS_POOL_KC_C * \
BLIS_SIZEOF_C \
)
#define BLIS_KN_BLOCK_SIZE_C ( ( BLIS_POOL_KC_C + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_C \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_C ( BLIS_POOL_KC_C * \
BLIS_POOL_NC_C * \
BLIS_SIZEOF_C \
)
@@ -238,21 +259,14 @@
)
//
// Compute memory pool block sizes for single complex.
// Compute memory pool block sizes for double complex.
//
#define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \
( BLIS_POOL_KC_Z + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_Z \
) \
) * \
BLIS_POOL_KC_Z * \
BLIS_SIZEOF_Z \
)
#define BLIS_KN_BLOCK_SIZE_Z ( ( BLIS_POOL_KC_Z + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_Z \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_Z ( BLIS_POOL_KC_Z * \
BLIS_POOL_NC_Z * \
BLIS_SIZEOF_Z \
)
@@ -261,6 +275,90 @@
BLIS_SIZEOF_Z \
)
//
// Compute memory pool block sizes for single complex (4m).
//
#define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
BLIS_POOL_4M_KC_C * \
BLIS_SIZEOF_C \
)
#define BLIS_KN_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_KC_C * \
BLIS_POOL_4M_NC_C * \
BLIS_SIZEOF_C \
)
#define BLIS_MN_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
BLIS_POOL_4M_NC_C * \
BLIS_SIZEOF_C \
)
//
// Compute memory pool block sizes for double complex (4m).
//
#define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
BLIS_POOL_4M_KC_Z * \
BLIS_SIZEOF_Z \
)
#define BLIS_KN_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_KC_Z * \
BLIS_POOL_4M_NC_Z * \
BLIS_SIZEOF_Z \
)
#define BLIS_MN_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
BLIS_POOL_4M_NC_Z * \
BLIS_SIZEOF_Z \
)
//
// Compute memory pool block sizes for single complex (3m).
//
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
#define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
BLIS_POOL_3M_KC_C * \
( BLIS_SIZEOF_C * \
3 \
) / 2 \
)
#define BLIS_KN_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_KC_C * \
BLIS_POOL_3M_NC_C * \
( BLIS_SIZEOF_C * \
3 \
) / 2 \
)
#define BLIS_MN_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
BLIS_POOL_3M_NC_C * \
( BLIS_SIZEOF_C * \
3 \
) / 2 \
)
//
// Compute memory pool block sizes for double complex (3m).
//
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
#define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
BLIS_POOL_3M_KC_Z * \
( BLIS_SIZEOF_Z * \
3 \
) / 2 \
)
#define BLIS_KN_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_KC_Z * \
BLIS_POOL_3M_NC_Z * \
( BLIS_SIZEOF_Z * \
3 \
) / 2 \
)
#define BLIS_MN_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
BLIS_POOL_3M_NC_Z * \
( BLIS_SIZEOF_Z * \
3 \
) / 2 \
)
// -- Maximum block size search ------------------------------------------------
@@ -283,6 +381,22 @@
#undef BLIS_MK_BLOCK_SIZE
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_Z
#endif
#if BLIS_MK_BLOCK_SIZE_4M_C > BLIS_MK_BLOCK_SIZE
#undef BLIS_MK_BLOCK_SIZE
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_4M_C
#endif
#if BLIS_MK_BLOCK_SIZE_4M_Z > BLIS_MK_BLOCK_SIZE
#undef BLIS_MK_BLOCK_SIZE
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_4M_Z
#endif
#if BLIS_MK_BLOCK_SIZE_3M_C > BLIS_MK_BLOCK_SIZE
#undef BLIS_MK_BLOCK_SIZE
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_3M_C
#endif
#if BLIS_MK_BLOCK_SIZE_3M_Z > BLIS_MK_BLOCK_SIZE
#undef BLIS_MK_BLOCK_SIZE
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_3M_Z
#endif
//
// Find the largest block size for panels of B.
@@ -300,6 +414,22 @@
#undef BLIS_KN_BLOCK_SIZE
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_Z
#endif
#if BLIS_KN_BLOCK_SIZE_4M_C > BLIS_KN_BLOCK_SIZE
#undef BLIS_KN_BLOCK_SIZE
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_4M_C
#endif
#if BLIS_KN_BLOCK_SIZE_4M_Z > BLIS_KN_BLOCK_SIZE
#undef BLIS_KN_BLOCK_SIZE
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_4M_Z
#endif
#if BLIS_KN_BLOCK_SIZE_3M_C > BLIS_KN_BLOCK_SIZE
#undef BLIS_KN_BLOCK_SIZE
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_3M_C
#endif
#if BLIS_KN_BLOCK_SIZE_3M_Z > BLIS_KN_BLOCK_SIZE
#undef BLIS_KN_BLOCK_SIZE
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_3M_Z
#endif
//
// Find the largest block size for panels of C.
@@ -317,6 +447,22 @@
#undef BLIS_MN_BLOCK_SIZE
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_Z
#endif
#if BLIS_MN_BLOCK_SIZE_4M_C > BLIS_MN_BLOCK_SIZE
#undef BLIS_MN_BLOCK_SIZE
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_4M_C
#endif
#if BLIS_MN_BLOCK_SIZE_4M_Z > BLIS_MN_BLOCK_SIZE
#undef BLIS_MN_BLOCK_SIZE
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_4M_Z
#endif
#if BLIS_MN_BLOCK_SIZE_3M_C > BLIS_MN_BLOCK_SIZE
#undef BLIS_MN_BLOCK_SIZE
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_3M_C
#endif
#if BLIS_MN_BLOCK_SIZE_3M_Z > BLIS_MN_BLOCK_SIZE
#undef BLIS_MN_BLOCK_SIZE
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_3M_Z
#endif
// -- Compute pool sizes -------------------------------------------------------

View File

@@ -232,9 +232,9 @@
(obj).info = ( (obj).info & ~BLIS_TRANS_BIT ) | (trans); \
}
#define bli_obj_set_conj( conj, obj ) \
#define bli_obj_set_conj( conjval, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_CONJ_BIT ) | (conj); \
(obj).info = ( (obj).info & ~BLIS_CONJ_BIT ) | (conjval); \
}
#define bli_obj_set_uplo( uplo, obj ) \
@@ -329,9 +329,9 @@
(obj).info = ( (obj).info ^ (trans) ); \
}
#define bli_obj_apply_conj( conj, obj )\
#define bli_obj_apply_conj( conjval, obj )\
{ \
(obj).info = ( (obj).info ^ (conj) ); \
(obj).info = ( (obj).info ^ (conjval) ); \
}
@@ -842,10 +842,10 @@ bli_obj_width_stored( obj )
// Create an alias with a conj value applied.
#define bli_obj_alias_with_conj( conj, a, b ) \
#define bli_obj_alias_with_conj( conja, a, b ) \
{ \
bli_obj_alias_to( a, b ); \
bli_obj_apply_conj( conj, b ); \
bli_obj_apply_conj( conja, b ); \
}
@@ -878,6 +878,19 @@ bli_obj_width_stored( obj )
bli_obj_pack_status( obj ) == BLIS_PACKED_COL_PANELS )
// Check if an object is packed for 4m/3m
#define bli_obj_is_panel_packed_4m( obj ) \
\
( bli_obj_pack_status( obj ) == BLIS_PACKED_ROW_PANELS_4M || \
bli_obj_pack_status( obj ) == BLIS_PACKED_COL_PANELS_4M )
#define bli_obj_is_panel_packed_3m( obj ) \
\
( bli_obj_pack_status( obj ) == BLIS_PACKED_ROW_PANELS_3M || \
bli_obj_pack_status( obj ) == BLIS_PACKED_COL_PANELS_3M )
// Release object's pack (and cast) memory entries back to memory manager
#define bli_obj_release_pack( obj_p ) \

View File

@@ -199,25 +199,25 @@
// conj
#define bli_is_noconj( conj ) \
#define bli_is_noconj( conjval ) \
\
( conj == BLIS_NO_CONJUGATE )
( conjval == BLIS_NO_CONJUGATE )
#define bli_is_conj( conj ) \
#define bli_is_conj( conjval ) \
\
( conj == BLIS_CONJUGATE )
( conjval == BLIS_CONJUGATE )
#define bli_conj_toggled( conj ) \
#define bli_conj_toggled( conjval ) \
\
( conj ^ BLIS_CONJ_BIT )
( conjval ^ BLIS_CONJ_BIT )
#define bli_apply_conj( conjapp, conj )\
#define bli_apply_conj( conjapp, conjval )\
\
( conj ^ (conjapp) )
( conjval ^ (conjapp) )
#define bli_toggle_conj( conj ) \
#define bli_toggle_conj( conjval ) \
{ \
conj = bli_conj_toggled( conj ); \
conjval = bli_conj_toggled( conjval ); \
}

View File

@@ -83,6 +83,9 @@
#include "bli_addjris.h"
#include "bli_addjs.h"
#include "bli_add3ris.h"
#include "bli_add3s.h"
#include "bli_axpyris.h"
#include "bli_axpys.h"
#include "bli_axpyjris.h"
@@ -103,6 +106,9 @@
#include "bli_copycjris.h"
#include "bli_copycjs.h"
#include "bli_copyri3s.h"
#include "bli_copyjri3s.h"
#include "bli_dots.h"
#include "bli_dotjs.h"
@@ -129,6 +135,16 @@
#include "bli_scal2jris.h"
#include "bli_scal2js.h"
#include "bli_scal2ri3s.h"
#include "bli_scal2jri3s.h"
#include "bli_set0ris.h"
#include "bli_set0s.h"
#include "bli_set1s.h"
#include "bli_seti0s.h"
#include "bli_sqrt2ris.h"
#include "bli_sqrt2s.h"
@@ -164,6 +180,8 @@
#include "bli_xpbys_mxn.h"
#include "bli_xpbys_mxn_uplo.h"
#include "bli_scalris_mxn_uplo.h"
// -- Miscellaneous macros --

View File

@@ -244,6 +244,10 @@ typedef struct
#define BLIS_BITVAL_PACKED_COLUMNS 0x30000
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x40000
#define BLIS_BITVAL_PACKED_COL_PANELS 0x50000
#define BLIS_BITVAL_PACKED_ROW_PANELS_4M 0x60000
#define BLIS_BITVAL_PACKED_COL_PANELS_4M 0x70000
#define BLIS_BITVAL_PACKED_ROW_PANELS_3M 0x80000
#define BLIS_BITVAL_PACKED_COL_PANELS_3M 0x90000
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
#define BLIS_BITVAL_PACK_REV_IF_UPPER 0x100000
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
@@ -346,13 +350,17 @@ typedef enum
typedef enum
{
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_ROW_PANELS_4M = BLIS_BITVAL_PACKED_ROW_PANELS_4M,
BLIS_PACKED_COL_PANELS_4M = BLIS_BITVAL_PACKED_COL_PANELS_4M,
BLIS_PACKED_ROW_PANELS_3M = BLIS_BITVAL_PACKED_ROW_PANELS_3M,
BLIS_PACKED_COL_PANELS_3M = BLIS_BITVAL_PACKED_COL_PANELS_3M,
} pack_t;
@@ -460,10 +468,10 @@ typedef struct func_s
- 3 == packed by columns
- 4 == packed by row panels
- 5 == packed by column panels
- 6 == unused
- 7 == unused
- 8 == unused
- 9 == unused
- 6 == packed by row panels (4m)
- 7 == packed by column panels (4m)
- 8 == packed by row panels (3m)
- 9 == packed by column panels (3m)
20 Packed panel order if upper-stored
- 0 == forward order if upper
- 1 == reverse order if upper

View File

@@ -78,8 +78,12 @@ extern "C" {
#include "bli_kernel.h"
#include "bli_kernel_type_defs.h"
#include "bli_kernel_pre_macro_defs.h"
#include "bli_kernel_macro_defs.h"
#include "bli_kernel_4m_macro_defs.h"
#include "bli_kernel_3m_macro_defs.h"
#include "bli_kernel_post_macro_defs.h"
#include "bli_kernel_prototypes.h"
// -- BLIS memory pool definitions --

View File

@@ -41,6 +41,8 @@
// - The first char encodes the type of x.
// - The second char encodes the type of a.
#ifndef BLIS_ENABLE_C99_COMPLEX
#define bli_ssabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), bli_simag(a) )
#define bli_dsabval2s( x, a ) bli_sabval2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), bli_simag(a) )
#define bli_csabval2s( x, a ) bli_sabval2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), bli_simag(a) )
@@ -51,8 +53,6 @@
#define bli_cdabval2s( x, a ) bli_dabval2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), bli_dimag(a) )
#define bli_zdabval2s( x, a ) bli_dabval2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), bli_dimag(a) )
#ifndef BLIS_ENABLE_C99_COMPLEX
#define bli_scabval2s( x, a ) bli_cabval2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) )
#define bli_dcabval2s( x, a ) bli_cabval2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) )
#define bli_ccabval2s( x, a ) bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) )

View File

@@ -0,0 +1,192 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_ADD3S_H
#define BLIS_ADD3S_H
// add3s
// Notes:
// - The first char encodes the type of a.
// - The second char encodes the type of b.
// - The third char encodes the type of c.
// -- (axy) = (??s) ------------------------------------------------------------
#define bli_sssadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
#define bli_dssadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
#define bli_cssadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
#define bli_zssadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
#define bli_sdsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
#define bli_ddsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
#define bli_cdsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
#define bli_zdsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
#define bli_scsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
#define bli_dcsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
#define bli_ccsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
#define bli_zcsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
#define bli_szsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
#define bli_dzsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
#define bli_czsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
#define bli_zzsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
// -- (axy) = (??d) ------------------------------------------------------------
#define bli_ssdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
#define bli_dsdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
#define bli_csdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
#define bli_zsdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
#define bli_sddadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_dddadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_cddadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_zddadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_scdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_dcdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_ccdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_zcdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_szdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_dzdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_czdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
#define bli_zzdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
#ifndef BLIS_ENABLE_C99_COMPLEX
// -- (axy) = (??c) ------------------------------------------------------------
#define bli_sscadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
#define bli_dscadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
#define bli_cscadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
#define bli_zscadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
#define bli_sdcadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
#define bli_ddcadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
#define bli_cdcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
#define bli_zdcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
#define bli_sccadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
#define bli_dccadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
#define bli_cccadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
#define bli_zccadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
#define bli_szcadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
#define bli_dzcadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
#define bli_czcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
#define bli_zzcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
// -- (axy) = (??z) ------------------------------------------------------------
#define bli_sszadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
#define bli_dszadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
#define bli_cszadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
#define bli_zszadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
#define bli_sdzadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_ddzadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_cdzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_zdzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_sczadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_dczadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_cczadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_zczadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_szzadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_dzzadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_czzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
#define bli_zzzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
#else // ifdef BLIS_ENABLE_C99_COMPLEX
// -- (axy) = (??c) ------------------------------------------------------------
#define bli_sscadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_dscadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_cscadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zscadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_sdcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_ddcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_cdcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zdcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_sccadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_dccadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_cccadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zccadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_szcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_dzcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_czcadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zzcadd3s( a, b, c ) { (c) = (a) + (b); }
// -- (axy) = (??z) ------------------------------------------------------------
#define bli_sszadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_dszadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_cszadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zszadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_sdzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_ddzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_cdzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zdzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_sczadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_dczadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_cczadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zczadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_szzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_dzzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_czzadd3s( a, b, c ) { (c) = (a) + (b); }
#define bli_zzzadd3s( a, b, c ) { (c) = (a) + (b); }
#endif // BLIS_ENABLE_C99_COMPLEX
#define bli_sadd3s( a, b, c ) bli_sssadd3s( a, b, c )
#define bli_dadd3s( a, b, c ) bli_dddadd3s( a, b, c )
#define bli_cadd3s( a, b, c ) bli_cccadd3s( a, b, c )
#define bli_zadd3s( a, b, c ) bli_zzzadd3s( a, b, c )
#endif

View File

@@ -148,28 +148,5 @@
( ( gint_t* ) bli_obj_buffer_for_const( BLIS_INT, BLIS_MINUS_TWO ) )
// set to constant
// set1s
#define bli_sset1s( a ) bli_ssets( 1.0F, 0.0F, (a) )
#define bli_dset1s( a ) bli_dsets( 1.0 , 0.0 , (a) )
#define bli_cset1s( a ) bli_csets( 1.0F, 0.0F, (a) )
#define bli_zset1s( a ) bli_zsets( 1.0 , 0.0 , (a) )
// set0s
#define bli_sset0s( a ) bli_ssets( 0.0F, 0.0F, (a) )
#define bli_dset0s( a ) bli_dsets( 0.0 , 0.0 , (a) )
#define bli_cset0s( a ) bli_csets( 0.0F, 0.0F, (a) )
#define bli_zset0s( a ) bli_zsets( 0.0 , 0.0 , (a) )
// seti0s
#define bli_sseti0s( a ) bli_ssetis( 0.0F, (a) )
#define bli_dseti0s( a ) bli_dsetis( 0.0 , (a) )
#define bli_cseti0s( a ) bli_csetis( 0.0F, (a) )
#define bli_zseti0s( a ) bli_zsetis( 0.0 , (a) )
#endif

View File

@@ -41,51 +41,51 @@
// - The first char encodes the type of x.
// - The second char encodes the type of y.
#define bli_sscopycjs( conj, x, y ) bli_scopycjris( conj, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
#define bli_dscopycjs( conj, x, y ) bli_scopycjris( conj, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
#define bli_cscopycjs( conj, x, y ) bli_scopycjris( conj, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
#define bli_zscopycjs( conj, x, y ) bli_scopycjris( conj, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
#define bli_sscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
#define bli_dscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
#define bli_cscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
#define bli_zscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
#define bli_sdcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
#define bli_ddcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_cdcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_zdcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_sdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
#define bli_ddcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_cdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_zdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
#ifndef BLIS_ENABLE_C99_COMPLEX
#define bli_sccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
#define bli_dccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
#define bli_cccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
#define bli_zccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
#define bli_sccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
#define bli_dccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
#define bli_cccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
#define bli_zccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
#define bli_szcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
#define bli_dzcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_czcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_zzcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_szcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
#define bli_dzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_czcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_zzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
#else // ifdef BLIS_ENABLE_C99_COMPLEX
#define bli_sccopycjs( conj, x, y ) { (y) = (x); }
#define bli_dccopycjs( conj, x, y ) { (y) = (x); }
#define bli_cccopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
#define bli_zccopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conj (x) : (x) ); }
#define bli_sccopycjs( conjx, x, y ) { (y) = (x); }
#define bli_dccopycjs( conjx, x, y ) { (y) = (x); }
#define bli_cccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
#define bli_zccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
#define bli_szcopycjs( conj, x, y ) { (y) = (x); }
#define bli_dzcopycjs( conj, x, y ) { (y) = (x); }
#define bli_czcopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
#define bli_zzcopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conj (x) : (x) ); }
#define bli_szcopycjs( conjx, x, y ) { (y) = (x); }
#define bli_dzcopycjs( conjx, x, y ) { (y) = (x); }
#define bli_czcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
#define bli_zzcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
#endif // BLIS_ENABLE_C99_COMPLEX
#define bli_iicopycjs( conj, x, y ) { (y) = ( gint_t ) (x); }
#define bli_iicopycjs( conjx, x, y ) { (y) = ( gint_t ) (x); }
#define bli_scopycjs( conj, x, y ) bli_sscopycjs( conj, x, y )
#define bli_dcopycjs( conj, x, y ) bli_ddcopycjs( conj, x, y )
#define bli_ccopycjs( conj, x, y ) bli_cccopycjs( conj, x, y )
#define bli_zcopycjs( conj, x, y ) bli_zzcopycjs( conj, x, y )
#define bli_icopycjs( conj, x, y ) bli_iicopycjs( conj, x, y )
#define bli_scopycjs( conjx, x, y ) bli_sscopycjs( conjx, x, y )
#define bli_dcopycjs( conjx, x, y ) bli_ddcopycjs( conjx, x, y )
#define bli_ccopycjs( conjx, x, y ) bli_cccopycjs( conjx, x, y )
#define bli_zcopycjs( conjx, x, y ) bli_zzcopycjs( conjx, x, y )
#define bli_icopycjs( conjx, x, y ) bli_iicopycjs( conjx, x, y )
#endif

View File

@@ -41,47 +41,47 @@
// - The first char encodes the type of x.
// - The second char encodes the type of y.
#define bli_ssscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
#define bli_dsscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
#define bli_csscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
#define bli_zsscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
#define bli_ssscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
#define bli_dsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
#define bli_csscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
#define bli_zsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
#define bli_sdscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
#define bli_ddscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_cdscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_zdscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_sdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
#define bli_ddscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_cdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
#define bli_zdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
#ifndef BLIS_ENABLE_C99_COMPLEX
#define bli_scscalcjs( conj, x, y ) bli_scscalcjris( conj, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
#define bli_dcscalcjs( conj, x, y ) bli_scscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
#define bli_ccscalcjs( conj, x, y ) bli_cscalcjris( conj, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
#define bli_zcscalcjs( conj, x, y ) bli_cscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
#define bli_scscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
#define bli_dcscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
#define bli_ccscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
#define bli_zcscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
#define bli_szscalcjs( conj, x, y ) bli_dzscalcjris( conj, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
#define bli_dzscalcjs( conj, x, y ) bli_dzscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_czscalcjs( conj, x, y ) bli_zscalcjris( conj, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_zzscalcjs( conj, x, y ) bli_zscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_szscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
#define bli_dzscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_czscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
#define bli_zzscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
#else // ifdef BLIS_ENABLE_C99_COMPLEX
#define bli_scscalcjs( conj, x, y ) { (y) *= (x); }
#define bli_dcscalcjs( conj, x, y ) { (y) *= (x); }
#define bli_ccscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
#define bli_zcscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conj (x) : (x) ); }
#define bli_scscalcjs( conjx, x, y ) { (y) *= (x); }
#define bli_dcscalcjs( conjx, x, y ) { (y) *= (x); }
#define bli_ccscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
#define bli_zcscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
#define bli_szscalcjs( conj, x, y ) { (y) *= (x); }
#define bli_dzscalcjs( conj, x, y ) { (y) *= (x); }
#define bli_czscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
#define bli_zzscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conj (x) : (x) ); }
#define bli_szscalcjs( conjx, x, y ) { (y) *= (x); }
#define bli_dzscalcjs( conjx, x, y ) { (y) *= (x); }
#define bli_czscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
#define bli_zzscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
#endif // BLIS_ENABLE_C99_COMPLEX
#define bli_sscalcjs( conj, x, y ) bli_ssscalcjs( conj, x, y )
#define bli_dscalcjs( conj, x, y ) bli_ddscalcjs( conj, x, y )
#define bli_cscalcjs( conj, x, y ) bli_ccscalcjs( conj, x, y )
#define bli_zscalcjs( conj, x, y ) bli_zzscalcjs( conj, x, y )
#define bli_sscalcjs( conjx, x, y ) bli_ssscalcjs( conjx, x, y )
#define bli_dscalcjs( conjx, x, y ) bli_ddscalcjs( conjx, x, y )
#define bli_cscalcjs( conjx, x, y ) bli_ccscalcjs( conjx, x, y )
#define bli_zscalcjs( conjx, x, y ) bli_zzscalcjs( conjx, x, y )
#endif

View File

@@ -0,0 +1,44 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SET0S_H
#define BLIS_SET0S_H
#define bli_sset0s( a ) bli_ssets( 0.0F, 0.0F, (a) )
#define bli_dset0s( a ) bli_dsets( 0.0 , 0.0 , (a) )
#define bli_cset0s( a ) bli_csets( 0.0F, 0.0F, (a) )
#define bli_zset0s( a ) bli_zsets( 0.0 , 0.0 , (a) )
#endif

View File

@@ -0,0 +1,44 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SET1S_H
#define BLIS_SET1S_H
#define bli_sset1s( a ) bli_ssets( 1.0F, 0.0F, (a) )
#define bli_dset1s( a ) bli_dsets( 1.0 , 0.0 , (a) )
#define bli_cset1s( a ) bli_csets( 1.0F, 0.0F, (a) )
#define bli_zset1s( a ) bli_zsets( 1.0 , 0.0 , (a) )
#endif

View File

@@ -0,0 +1,44 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SETI0S_H
#define BLIS_SETI0S_H
#define bli_sseti0s( a ) bli_ssetis( 0.0F, (a) )
#define bli_dseti0s( a ) bli_dsetis( 0.0 , (a) )
#define bli_cseti0s( a ) bli_csetis( 0.0F, (a) )
#define bli_zseti0s( a ) bli_zsetis( 0.0 , (a) )
#endif

View File

@@ -0,0 +1,81 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SET0RIS_MXN_H
#define BLIS_SET0RIS_MXN_H
// set0ris_mxn
#define bli_sset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_sset0ris( *(ar + i*rs_a + j*cs_a), \
*(ai + i*rs_a + j*cs_a) ); \
}
#define bli_dset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_dset0ris( *(ar + i*rs_a + j*cs_a), \
*(ai + i*rs_a + j*cs_a) ); \
}
#define bli_cset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_cset0ris( *(ar + i*rs_a + j*cs_a), \
*(ai + i*rs_a + j*cs_a) ); \
}
#define bli_zset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_zset0ris( *(ar + i*rs_a + j*cs_a), \
*(ai + i*rs_a + j*cs_a) ); \
}
#endif

View File

@@ -0,0 +1,63 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_ADD3RIS_H
#define BLIS_ADD3RIS_H
// add3ris
#define bli_sadd3ris( ar, ai, br, bi, cr, ci ) \
{ \
(cr) = (ar) + (br); \
}
#define bli_dadd3ris( ar, ai, br, bi, cr, ci ) \
{ \
(cr) = (ar) + (br); \
}
#define bli_cadd3ris( ar, ai, br, bi, cr, ci ) \
{ \
(cr) = (ar) + (br); \
(ci) = (ai) + (bi); \
}
#define bli_zadd3ris( ar, ai, br, bi, cr, ci ) \
{ \
(cr) = (ar) + (br); \
(ci) = (ai) + (bi); \
}
#endif

View File

@@ -0,0 +1,110 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SCALRIS_MXN_UPLO_H
#define BLIS_SCALRIS_MXN_UPLO_H
// scalris_mxn_u
#define bli_cscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
{ \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_cscalris( *(ar), \
*(ai), \
*((xr) + i*rs_x + j*cs_x), \
*((xi) + i*rs_x + j*cs_x) ); \
} \
} \
}
#define bli_zscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
{ \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_zscalris( *(ar), \
*(ai), \
*((xr) + i*rs_x + j*cs_x), \
*((xi) + i*rs_x + j*cs_x) ); \
} \
} \
}
// scalris_mxn_l
#define bli_cscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
{ \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_cscalris( *(ar), \
*(ai), \
*((xr) + i*rs_x + j*cs_x), \
*((xi) + i*rs_x + j*cs_x) ); \
} \
} \
}
#define bli_zscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
{ \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_zscalris( *(ar), \
*(ai), \
*((xr) + i*rs_x + j*cs_x), \
*((xi) + i*rs_x + j*cs_x) ); \
} \
} \
}
#endif

View File

@@ -0,0 +1,46 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SET0RIS_H
#define BLIS_SET0RIS_H
// set0ris
#define bli_sset0ris( xr, xi ) bli_scopyris( 0.0F, 0.0F, xr, xi )
#define bli_dset0ris( xr, xi ) bli_dcopyris( 0.0 , 0.0 , xr, xi )
#define bli_cset0ris( xr, xi ) bli_ccopyris( 0.0F, 0.0F, xr, xi )
#define bli_zset0ris( xr, xi ) bli_zcopyris( 0.0 , 0.0 , xr, xi )
#endif

View File

@@ -0,0 +1,46 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_COPYJRI3S_H
#define BLIS_COPYJRI3S_H
// copyjri3s
#define bli_scopyjri3s( ar, ai, br, bi, bri ) bli_scopyri3s( (ar), -(ai), (br), (bi), (bri) )
#define bli_dcopyjri3s( ar, ai, br, bi, bri ) bli_dcopyri3s( (ar), -(ai), (br), (bi), (bri) )
#define bli_ccopyjri3s( ar, ai, br, bi, bri ) bli_ccopyri3s( (ar), -(ai), (br), (bi), (bri) )
#define bli_zcopyjri3s( ar, ai, br, bi, bri ) bli_zcopyri3s( (ar), -(ai), (br), (bi), (bri) )
#endif

View File

@@ -0,0 +1,65 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_COPYRI3S_H
#define BLIS_COPYRI3S_H
// copyri3s
#define bli_scopyri3s( ar, ai, br, bi, bri ) \
{ \
(br) = (ar); \
}
#define bli_dcopyri3s( ar, ai, br, bi, bri ) \
{ \
(br) = (ar); \
}
#define bli_ccopyri3s( ar, ai, br, bi, bri ) \
{ \
(br) = (ar); \
(bi) = (ai); \
(bri) = (ar) + (ai); \
}
#define bli_zcopyri3s( ar, ai, br, bi, bri ) \
{ \
(br) = (ar); \
(bi) = (ai); \
(bri) = (ar) + (ai); \
}
#endif

View File

@@ -0,0 +1,79 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SCAL2JRI3S_H
#define BLIS_SCAL2JRI3S_H
// scal2jri3s
#define bli_sscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
}
#define bli_dscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
}
#define bli_cscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr) + (ai) * (xi); \
(yi) = (ai) * (xr) - (ar) * (xi); \
(yri) = (yr) + (yi); \
}
#define bli_zscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr) + (ai) * (xi); \
(yi) = (ai) * (xr) - (ar) * (xi); \
(yri) = (yr) + (yi); \
}
#define bli_scscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
(yi) = (ar) * -(xi); \
(yri) = (yr) + (yi); \
}
#define bli_dzscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
(yi) = (ar) * -(xi); \
(yri) = (yr) + (yi); \
}
#endif

View File

@@ -0,0 +1,79 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SCAL2RI3S_H
#define BLIS_SCAL2RI3S_H
// scal2ri3s
#define bli_sscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
}
#define bli_dscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
}
#define bli_cscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr) - (ai) * (xi); \
(yi) = (ai) * (xr) + (ar) * (xi); \
(yri) = (yr) + (yi); \
}
#define bli_zscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr) - (ai) * (xi); \
(yi) = (ai) * (xr) + (ar) * (xi); \
(yri) = (yr) + (yi); \
}
#define bli_scscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
(yi) = (ar) * (xi); \
(yri) = (yr) + (yi); \
}
#define bli_dzscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
{ \
(yr) = (ar) * (xr); \
(yi) = (ar) * (xi); \
(yri) = (yr) + (yi); \
}
#endif