mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Merge branch 'master' of https://github.com/tlrmchlsmth/blis
This commit is contained in:
@@ -50,6 +50,12 @@
|
||||
#define bli_auxinfo_set_next_a( a_p, auxinfo ) { (auxinfo).a_next = a_p; }
|
||||
#define bli_auxinfo_set_next_b( b_p, auxinfo ) { (auxinfo).b_next = b_p; }
|
||||
|
||||
#define bli_auxinfo_set_next_ab( a_p, b_p, auxinfo ) \
|
||||
{ \
|
||||
bli_auxinfo_set_next_a( a_p, auxinfo ); \
|
||||
bli_auxinfo_set_next_b( b_p, auxinfo ); \
|
||||
}
|
||||
|
||||
#define bli_auxinfo_set_ps_a( a_p, auxinfo ) { (auxinfo).ps_a = a_p; }
|
||||
#define bli_auxinfo_set_ps_b( b_p, auxinfo ) { (auxinfo).ps_b = b_p; }
|
||||
|
||||
|
||||
@@ -45,16 +45,7 @@
|
||||
#define bli_dimag( x ) ( 0.0 )
|
||||
|
||||
|
||||
#ifdef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
#define bli_creal( x ) ( crealf(x) )
|
||||
#define bli_cimag( x ) ( cimagf(x) )
|
||||
#define bli_zreal( x ) ( creal(x) )
|
||||
#define bli_zimag( x ) ( cimag(x) )
|
||||
|
||||
|
||||
#else // ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
#ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
#define bli_creal( x ) ( (x).real )
|
||||
@@ -63,6 +54,15 @@
|
||||
#define bli_zimag( x ) ( (x).imag )
|
||||
|
||||
|
||||
#else // ifdef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
#define bli_creal( x ) ( crealf(x) )
|
||||
#define bli_cimag( x ) ( cimagf(x) )
|
||||
#define bli_zreal( x ) ( creal(x) )
|
||||
#define bli_zimag( x ) ( cimag(x) )
|
||||
|
||||
|
||||
#endif // BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
|
||||
@@ -194,6 +194,33 @@ GENTFUNCR( scomplex, float, c, s, tfuncname, varname ) \
|
||||
GENTFUNCR( dcomplex, double, z, d, tfuncname, varname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with complex domain only and real projection (with no auxiliary arguments) --
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCCO_BASIC0( tfuncname ) \
|
||||
\
|
||||
GENTFUNCCO( scomplex, float, c, s, tfuncname ) \
|
||||
GENTFUNCCO( dcomplex, double, z, d, tfuncname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with complex domain only and real projection --
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCCO_BASIC( tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNCCO( scomplex, float, c, s, tfuncname, varname ) \
|
||||
GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with complex domain only and real projection (with two auxiliary arguments) --
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCCO_BASIC2( tfuncname, varname1, varname2 ) \
|
||||
\
|
||||
GENTFUNCCO( scomplex, float, c, s, tfuncname, varname1, varname2 ) \
|
||||
GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname1, varname2 )
|
||||
|
||||
|
||||
// -- Basic one-operand with real and integer projections --
|
||||
|
||||
|
||||
|
||||
@@ -171,6 +171,15 @@ GENTPROTR( scomplex, float, c, s, funcname ) \
|
||||
GENTPROTR( dcomplex, double, z, d, funcname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with complex domain only and real projection --
|
||||
|
||||
|
||||
#define INSERT_GENTPROTCO_BASIC( funcname ) \
|
||||
\
|
||||
GENTPROTCO( scomplex, float, c, s, funcname ) \
|
||||
GENTPROTCO( dcomplex, double, z, d, funcname )
|
||||
|
||||
|
||||
// -- Basic one-operand with real and integer projections --
|
||||
|
||||
|
||||
|
||||
413
frame/include/bli_kernel_3m_macro_defs.h
Normal file
413
frame/include/bli_kernel_3m_macro_defs.h
Normal file
@@ -0,0 +1,413 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_KERNEL_3M_MACRO_DEFS_H
|
||||
#define BLIS_KERNEL_3M_MACRO_DEFS_H
|
||||
|
||||
|
||||
// -- Define datatype-agnostic base 3m kernel names ----------------------------
|
||||
|
||||
//
|
||||
// Level-3 3m
|
||||
//
|
||||
|
||||
// gemm3m micro-kernels
|
||||
|
||||
#ifndef BLIS_CGEMM3M_UKERNEL
|
||||
#define BLIS_CGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZGEMM3M_UKERNEL
|
||||
#define BLIS_ZGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// gemmtrsm3m_l micro-kernels
|
||||
|
||||
#ifndef BLIS_CGEMMTRSM3M_L_UKERNEL
|
||||
#define BLIS_CGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZGEMMTRSM3M_L_UKERNEL
|
||||
#define BLIS_ZGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// gemmtrsm3m_u micro-kernels
|
||||
|
||||
#ifndef BLIS_CGEMMTRSM3M_U_UKERNEL
|
||||
#define BLIS_CGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZGEMMTRSM3M_U_UKERNEL
|
||||
#define BLIS_ZGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// trsm3m_l micro-kernels
|
||||
|
||||
#ifndef BLIS_CTRSM3M_L_UKERNEL
|
||||
#define BLIS_CTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZTRSM3M_L_UKERNEL
|
||||
#define BLIS_ZTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// trsm3m_u micro-kernels
|
||||
|
||||
#ifndef BLIS_CTRSM3M_U_UKERNEL
|
||||
#define BLIS_CTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZTRSM3M_U_UKERNEL
|
||||
#define BLIS_ZTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
//
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// packm_2xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_2XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_2XK_RI3_KERNEL BLIS_SPACKM_2XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_2XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_2XK_RI3_KERNEL BLIS_DPACKM_2XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_2XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_2XK_RI3_KERNEL BLIS_CPACKM_2XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_2XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_2XK_RI3_KERNEL BLIS_ZPACKM_2XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_4xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_4XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_4XK_RI3_KERNEL BLIS_SPACKM_4XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_4XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_4XK_RI3_KERNEL BLIS_DPACKM_4XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_4XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_4XK_RI3_KERNEL BLIS_CPACKM_4XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_4XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_4XK_RI3_KERNEL BLIS_ZPACKM_4XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_6xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_6XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_6XK_RI3_KERNEL BLIS_SPACKM_6XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_6XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_6XK_RI3_KERNEL BLIS_DPACKM_6XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_6XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_6XK_RI3_KERNEL BLIS_CPACKM_6XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_6XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_6XK_RI3_KERNEL BLIS_ZPACKM_6XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_8xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_8XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_8XK_RI3_KERNEL BLIS_SPACKM_8XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_8XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_8XK_RI3_KERNEL BLIS_DPACKM_8XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_8XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_8XK_RI3_KERNEL BLIS_CPACKM_8XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_8XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_8XK_RI3_KERNEL BLIS_ZPACKM_8XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_10xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_10XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_10XK_RI3_KERNEL BLIS_SPACKM_10XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_10XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_10XK_RI3_KERNEL BLIS_DPACKM_10XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_10XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_10XK_RI3_KERNEL BLIS_CPACKM_10XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_10XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_10XK_RI3_KERNEL BLIS_ZPACKM_10XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_12xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_12XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_12XK_RI3_KERNEL BLIS_SPACKM_12XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_12XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_12XK_RI3_KERNEL BLIS_DPACKM_12XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_12XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_12XK_RI3_KERNEL BLIS_CPACKM_12XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_12XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_12XK_RI3_KERNEL BLIS_ZPACKM_12XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_14xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_14XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_14XK_RI3_KERNEL BLIS_SPACKM_14XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_14XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_14XK_RI3_KERNEL BLIS_DPACKM_14XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_14XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_14XK_RI3_KERNEL BLIS_CPACKM_14XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_14XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_14XK_RI3_KERNEL BLIS_ZPACKM_14XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_16xk_ri3 kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_16XK_RI3_KERNEL
|
||||
#define BLIS_SPACKM_16XK_RI3_KERNEL BLIS_SPACKM_16XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_16XK_RI3_KERNEL
|
||||
#define BLIS_DPACKM_16XK_RI3_KERNEL BLIS_DPACKM_16XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_16XK_RI3_KERNEL
|
||||
#define BLIS_CPACKM_16XK_RI3_KERNEL BLIS_CPACKM_16XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_16XK_RI3_KERNEL
|
||||
#define BLIS_ZPACKM_16XK_RI3_KERNEL BLIS_ZPACKM_16XK_RI3_KERNEL_REF
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// -- Define default 3m-specific blocksize macros ------------------------------
|
||||
|
||||
// Define complex 3m register blocksizes in terms of blocksizes used for
|
||||
// real kernels.
|
||||
|
||||
// 3m register blocksizes
|
||||
#define BLIS_DEFAULT_3M_MR_C BLIS_DEFAULT_MR_S
|
||||
#define BLIS_DEFAULT_3M_KR_C BLIS_DEFAULT_KR_S
|
||||
#define BLIS_DEFAULT_3M_NR_C BLIS_DEFAULT_NR_S
|
||||
|
||||
#define BLIS_DEFAULT_3M_MR_Z BLIS_DEFAULT_MR_D
|
||||
#define BLIS_DEFAULT_3M_KR_Z BLIS_DEFAULT_KR_D
|
||||
#define BLIS_DEFAULT_3M_NR_Z BLIS_DEFAULT_NR_D
|
||||
|
||||
// 3m register blocksize extensions
|
||||
#define BLIS_EXTEND_3M_MR_C BLIS_EXTEND_MR_S
|
||||
#define BLIS_EXTEND_3M_KR_C 0
|
||||
#define BLIS_EXTEND_3M_NR_C BLIS_EXTEND_NR_S
|
||||
|
||||
#define BLIS_EXTEND_3M_MR_Z BLIS_EXTEND_MR_D
|
||||
#define BLIS_EXTEND_3M_KR_Z 0
|
||||
#define BLIS_EXTEND_3M_NR_Z BLIS_EXTEND_NR_D
|
||||
|
||||
// Define complex 3m cache blocksizes in terms of blocksizes used for
|
||||
// real operations (if they have not yet already been defined).
|
||||
|
||||
// 3m cache blocksizes
|
||||
#ifndef BLIS_DEFAULT_3M_MC_C
|
||||
#define BLIS_DEFAULT_3M_MC_C ((BLIS_DEFAULT_MC_S)/1)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_KC_C
|
||||
#define BLIS_DEFAULT_3M_KC_C ((BLIS_DEFAULT_KC_S)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_NC_C
|
||||
#define BLIS_DEFAULT_3M_NC_C ((BLIS_DEFAULT_NC_S)/1)
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_3M_MC_Z
|
||||
#define BLIS_DEFAULT_3M_MC_Z ((BLIS_DEFAULT_MC_D)/1)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_KC_Z
|
||||
#define BLIS_DEFAULT_3M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_NC_Z
|
||||
#define BLIS_DEFAULT_3M_NC_Z ((BLIS_DEFAULT_NC_D)/1)
|
||||
#endif
|
||||
|
||||
// 3m cache blocksize extensions
|
||||
#ifndef BLIS_EXTEND_3M_MC_C
|
||||
#define BLIS_EXTEND_3M_MC_C 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_KC_C
|
||||
#define BLIS_EXTEND_3M_KC_C 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_NC_C
|
||||
#define BLIS_EXTEND_3M_NC_C 0
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_EXTEND_3M_MC_Z
|
||||
#define BLIS_EXTEND_3M_MC_Z 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_KC_Z
|
||||
#define BLIS_EXTEND_3M_KC_Z 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_NC_Z
|
||||
#define BLIS_EXTEND_3M_NC_Z 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// -- Kernel blocksize checks --------------------------------------------------
|
||||
|
||||
// Verify that cache blocksizes are whole multiples of register blocksizes.
|
||||
// Specifically, verify that:
|
||||
// - MC is a whole multiple of MR *AND* NR.
|
||||
// - NC is a whole multiple of NR *AND* MR.
|
||||
// - KC is a whole multiple of KR *AND* both MR, NR.
|
||||
// These constraints are enforced because it makes it easier to handle diagonals
|
||||
// in the macro-kernel implementations.
|
||||
|
||||
//
|
||||
// MC must be a whole multiple of MR and NR.
|
||||
//
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_MC_C % BLIS_DEFAULT_3M_MR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_MC_Z % BLIS_DEFAULT_3M_MR_Z != 0 ) \
|
||||
)
|
||||
#error "MC (3m) must be multiple of MR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_MC_C % BLIS_DEFAULT_3M_NR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_MC_Z % BLIS_DEFAULT_3M_NR_Z != 0 ) \
|
||||
)
|
||||
#error "MC (3m) must be multiple of NR for all datatypes."
|
||||
#endif
|
||||
|
||||
//
|
||||
// NC must be a whole multiple of NR and MR.
|
||||
//
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_NC_C % BLIS_DEFAULT_3M_NR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_NC_Z % BLIS_DEFAULT_3M_NR_Z != 0 ) \
|
||||
)
|
||||
#error "NC (3m) must be multiple of NR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_NC_C % BLIS_DEFAULT_3M_MR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_NC_Z % BLIS_DEFAULT_3M_MR_Z != 0 ) \
|
||||
)
|
||||
#error "NC (3m) must be multiple of MR for all datatypes."
|
||||
#endif
|
||||
|
||||
//
|
||||
// KC must be a whole multiple of KR, MR, and NR.
|
||||
//
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_KC_C % BLIS_DEFAULT_3M_KR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_KC_Z % BLIS_DEFAULT_3M_KR_Z != 0 ) \
|
||||
)
|
||||
#error "KC (3m) must be multiple of KR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_KC_C % BLIS_DEFAULT_3M_MR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_KC_Z % BLIS_DEFAULT_3M_MR_Z != 0 ) \
|
||||
)
|
||||
#error "KC (3m) must be multiple of MR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_3M_KC_C % BLIS_DEFAULT_3M_NR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_3M_KC_Z % BLIS_DEFAULT_3M_NR_Z != 0 ) \
|
||||
)
|
||||
#error "KC (3m) must be multiple of NR for all datatypes."
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// -- Compute extended blocksizes ----------------------------------------------
|
||||
|
||||
//
|
||||
// Compute maximum cache blocksizes.
|
||||
//
|
||||
|
||||
#define BLIS_MAXIMUM_3M_MC_C ( BLIS_DEFAULT_3M_MC_C + BLIS_EXTEND_3M_MC_C )
|
||||
#define BLIS_MAXIMUM_3M_KC_C ( BLIS_DEFAULT_3M_KC_C + BLIS_EXTEND_3M_KC_C )
|
||||
#define BLIS_MAXIMUM_3M_NC_C ( BLIS_DEFAULT_3M_NC_C + BLIS_EXTEND_3M_NC_C )
|
||||
|
||||
#define BLIS_MAXIMUM_3M_MC_Z ( BLIS_DEFAULT_3M_MC_Z + BLIS_EXTEND_3M_MC_Z )
|
||||
#define BLIS_MAXIMUM_3M_KC_Z ( BLIS_DEFAULT_3M_KC_Z + BLIS_EXTEND_3M_KC_Z )
|
||||
#define BLIS_MAXIMUM_3M_NC_Z ( BLIS_DEFAULT_3M_NC_Z + BLIS_EXTEND_3M_NC_Z )
|
||||
|
||||
//
|
||||
// Compute leading dimension blocksizes used when packing micro-panels.
|
||||
//
|
||||
|
||||
#define BLIS_PACKDIM_3M_MR_C ( BLIS_DEFAULT_3M_MR_C + BLIS_EXTEND_3M_MR_C )
|
||||
#define BLIS_PACKDIM_3M_KR_C ( BLIS_DEFAULT_3M_KR_C + BLIS_EXTEND_3M_KR_C )
|
||||
#define BLIS_PACKDIM_3M_NR_C ( BLIS_DEFAULT_3M_NR_C + BLIS_EXTEND_3M_NR_C )
|
||||
|
||||
#define BLIS_PACKDIM_3M_MR_Z ( BLIS_DEFAULT_3M_MR_Z + BLIS_EXTEND_3M_MR_Z )
|
||||
#define BLIS_PACKDIM_3M_KR_Z ( BLIS_DEFAULT_3M_KR_Z + BLIS_EXTEND_3M_KR_Z )
|
||||
#define BLIS_PACKDIM_3M_NR_Z ( BLIS_DEFAULT_3M_NR_Z + BLIS_EXTEND_3M_NR_Z )
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
416
frame/include/bli_kernel_4m_macro_defs.h
Normal file
416
frame/include/bli_kernel_4m_macro_defs.h
Normal file
@@ -0,0 +1,416 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_KERNEL_4M_MACRO_DEFS_H
|
||||
#define BLIS_KERNEL_4M_MACRO_DEFS_H
|
||||
|
||||
|
||||
// -- Construct 4m kernel function names ---------------------------------------
|
||||
|
||||
//
|
||||
// Level-3 4m
|
||||
//
|
||||
|
||||
// gemm4m micro-kernels
|
||||
|
||||
#ifndef BLIS_CGEMM4M_UKERNEL
|
||||
#define BLIS_CGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZGEMM4M_UKERNEL
|
||||
#define BLIS_ZGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// gemmtrsm4m_l micro-kernels
|
||||
|
||||
#ifndef BLIS_CGEMMTRSM4M_L_UKERNEL
|
||||
#define BLIS_CGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZGEMMTRSM4M_L_UKERNEL
|
||||
#define BLIS_ZGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// gemmtrsm4m_u micro-kernels
|
||||
|
||||
#ifndef BLIS_CGEMMTRSM4M_U_UKERNEL
|
||||
#define BLIS_CGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZGEMMTRSM4M_U_UKERNEL
|
||||
#define BLIS_ZGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// trsm4m_l micro-kernels
|
||||
|
||||
#ifndef BLIS_CTRSM4M_L_UKERNEL
|
||||
#define BLIS_CTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZTRSM4M_L_UKERNEL
|
||||
#define BLIS_ZTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
// trsm4m_u micro-kernels
|
||||
|
||||
#ifndef BLIS_CTRSM4M_U_UKERNEL
|
||||
#define BLIS_CTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZTRSM4M_U_UKERNEL
|
||||
#define BLIS_ZTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL_REF
|
||||
#endif
|
||||
|
||||
//
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// packm_2xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_2XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_2XK_RI_KERNEL BLIS_SPACKM_2XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_2XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_2XK_RI_KERNEL BLIS_DPACKM_2XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_2XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_2XK_RI_KERNEL BLIS_CPACKM_2XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_2XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_2XK_RI_KERNEL BLIS_ZPACKM_2XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_4xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_4XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_4XK_RI_KERNEL BLIS_SPACKM_4XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_4XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_4XK_RI_KERNEL BLIS_DPACKM_4XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_4XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_4XK_RI_KERNEL BLIS_CPACKM_4XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_4XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_4XK_RI_KERNEL BLIS_ZPACKM_4XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_6xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_6XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_6XK_RI_KERNEL BLIS_SPACKM_6XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_6XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_6XK_RI_KERNEL BLIS_DPACKM_6XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_6XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_6XK_RI_KERNEL BLIS_CPACKM_6XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_6XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_6XK_RI_KERNEL BLIS_ZPACKM_6XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_8xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_8XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_8XK_RI_KERNEL BLIS_SPACKM_8XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_8XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_8XK_RI_KERNEL BLIS_DPACKM_8XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_8XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_8XK_RI_KERNEL BLIS_CPACKM_8XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_8XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_8XK_RI_KERNEL BLIS_ZPACKM_8XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_10xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_10XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_10XK_RI_KERNEL BLIS_SPACKM_10XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_10XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_10XK_RI_KERNEL BLIS_DPACKM_10XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_10XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_10XK_RI_KERNEL BLIS_CPACKM_10XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_10XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_10XK_RI_KERNEL BLIS_ZPACKM_10XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_12xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_12XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_12XK_RI_KERNEL BLIS_SPACKM_12XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_12XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_12XK_RI_KERNEL BLIS_DPACKM_12XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_12XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_12XK_RI_KERNEL BLIS_CPACKM_12XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_12XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_12XK_RI_KERNEL BLIS_ZPACKM_12XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_14xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_14XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_14XK_RI_KERNEL BLIS_SPACKM_14XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_14XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_14XK_RI_KERNEL BLIS_DPACKM_14XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_14XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_14XK_RI_KERNEL BLIS_CPACKM_14XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_14XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_14XK_RI_KERNEL BLIS_ZPACKM_14XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
// packm_16xk_ri kernels
|
||||
|
||||
#ifndef BLIS_SPACKM_16XK_RI_KERNEL
|
||||
#define BLIS_SPACKM_16XK_RI_KERNEL BLIS_SPACKM_16XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DPACKM_16XK_RI_KERNEL
|
||||
#define BLIS_DPACKM_16XK_RI_KERNEL BLIS_DPACKM_16XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_CPACKM_16XK_RI_KERNEL
|
||||
#define BLIS_CPACKM_16XK_RI_KERNEL BLIS_CPACKM_16XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ZPACKM_16XK_RI_KERNEL
|
||||
#define BLIS_ZPACKM_16XK_RI_KERNEL BLIS_ZPACKM_16XK_RI_KERNEL_REF
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// -- Define default 4m-specific blocksize macros ------------------------------
|
||||
|
||||
// Define complex 4m register blocksizes in terms of blocksizes used for
|
||||
// real kernels.
|
||||
|
||||
// 4m register blocksizes
|
||||
#define BLIS_DEFAULT_4M_MR_C BLIS_DEFAULT_MR_S
|
||||
#define BLIS_DEFAULT_4M_KR_C BLIS_DEFAULT_KR_S
|
||||
#define BLIS_DEFAULT_4M_NR_C BLIS_DEFAULT_NR_S
|
||||
|
||||
#define BLIS_DEFAULT_4M_MR_Z BLIS_DEFAULT_MR_D
|
||||
#define BLIS_DEFAULT_4M_KR_Z BLIS_DEFAULT_KR_D
|
||||
#define BLIS_DEFAULT_4M_NR_Z BLIS_DEFAULT_NR_D
|
||||
|
||||
// 4m register blocksize extensions
|
||||
#define BLIS_EXTEND_4M_MR_C BLIS_EXTEND_MR_S
|
||||
#define BLIS_EXTEND_4M_KR_C 0
|
||||
#define BLIS_EXTEND_4M_NR_C BLIS_EXTEND_NR_S
|
||||
|
||||
#define BLIS_EXTEND_4M_MR_Z BLIS_EXTEND_MR_D
|
||||
#define BLIS_EXTEND_4M_KR_Z 0
|
||||
#define BLIS_EXTEND_4M_NR_Z BLIS_EXTEND_NR_D
|
||||
|
||||
// Define complex 4m cache blocksizes in terms of blocksizes used for
|
||||
// real operations (if they have not yet already been defined).
|
||||
|
||||
// 4m cache blocksizes
|
||||
#ifndef BLIS_DEFAULT_4M_MC_C
|
||||
#define BLIS_DEFAULT_4M_MC_C ((BLIS_DEFAULT_MC_S)/1)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_KC_C
|
||||
#define BLIS_DEFAULT_4M_KC_C ((BLIS_DEFAULT_KC_S)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_NC_C
|
||||
#define BLIS_DEFAULT_4M_NC_C ((BLIS_DEFAULT_NC_S)/1)
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_4M_MC_Z
|
||||
#define BLIS_DEFAULT_4M_MC_Z ((BLIS_DEFAULT_MC_D)/1)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_KC_Z
|
||||
#define BLIS_DEFAULT_4M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_NC_Z
|
||||
#define BLIS_DEFAULT_4M_NC_Z ((BLIS_DEFAULT_NC_D)/1)
|
||||
#endif
|
||||
|
||||
// 4m cache blocksize extensions
|
||||
#ifndef BLIS_EXTEND_4M_MC_C
|
||||
#define BLIS_EXTEND_4M_MC_C 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_KC_C
|
||||
#define BLIS_EXTEND_4M_KC_C 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_NC_C
|
||||
#define BLIS_EXTEND_4M_NC_C 0
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_EXTEND_4M_MC_Z
|
||||
#define BLIS_EXTEND_4M_MC_Z 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_KC_Z
|
||||
#define BLIS_EXTEND_4M_KC_Z 0
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_NC_Z
|
||||
#define BLIS_EXTEND_4M_NC_Z 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// -- Kernel blocksize checks --------------------------------------------------
|
||||
|
||||
// Verify that cache blocksizes are whole multiples of register blocksizes.
|
||||
// Specifically, verify that:
|
||||
// - MC is a whole multiple of MR *AND* NR.
|
||||
// - NC is a whole multiple of NR *AND* MR.
|
||||
// - KC is a whole multiple of KR *AND* both MR, NR.
|
||||
// These constraints are enforced because it makes it easier to handle diagonals
|
||||
// in the macro-kernel implementations.
|
||||
|
||||
//
|
||||
// MC must be a whole multiple of MR and NR.
|
||||
//
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_MC_C % BLIS_DEFAULT_4M_MR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_MC_Z % BLIS_DEFAULT_4M_MR_Z != 0 ) \
|
||||
)
|
||||
#error "MC (4m) must be multiple of MR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_MC_C % BLIS_DEFAULT_4M_NR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_MC_Z % BLIS_DEFAULT_4M_NR_Z != 0 ) \
|
||||
)
|
||||
#error "MC (4m) must be multiple of NR for all datatypes."
|
||||
#endif
|
||||
|
||||
//
|
||||
// NC must be a whole multiple of NR and MR.
|
||||
//
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_NC_C % BLIS_DEFAULT_4M_NR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_NC_Z % BLIS_DEFAULT_4M_NR_Z != 0 ) \
|
||||
)
|
||||
#error "NC (4m) must be multiple of NR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_NC_C % BLIS_DEFAULT_4M_MR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_NC_Z % BLIS_DEFAULT_4M_MR_Z != 0 ) \
|
||||
)
|
||||
#error "NC (4m) must be multiple of MR for all datatypes."
|
||||
#endif
|
||||
|
||||
//
|
||||
// KC must be a whole multiple of KR, MR, and NR.
|
||||
//
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_KC_C % BLIS_DEFAULT_4M_KR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_KC_Z % BLIS_DEFAULT_4M_KR_Z != 0 ) \
|
||||
)
|
||||
#error "KC (4m) must be multiple of KR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_KC_C % BLIS_DEFAULT_4M_MR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_KC_Z % BLIS_DEFAULT_4M_MR_Z != 0 ) \
|
||||
)
|
||||
#error "KC (4m) must be multiple of MR for all datatypes."
|
||||
#endif
|
||||
|
||||
#if ( \
|
||||
( BLIS_DEFAULT_4M_KC_C % BLIS_DEFAULT_4M_NR_C != 0 ) || \
|
||||
( BLIS_DEFAULT_4M_KC_Z % BLIS_DEFAULT_4M_NR_Z != 0 ) \
|
||||
)
|
||||
#error "KC (4m) must be multiple of NR for all datatypes."
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// -- Compute extended blocksizes ----------------------------------------------
|
||||
|
||||
//
|
||||
// Compute maximum cache blocksizes.
|
||||
//
|
||||
|
||||
#define BLIS_MAXIMUM_4M_MC_C ( BLIS_DEFAULT_4M_MC_C + BLIS_EXTEND_4M_MC_C )
|
||||
#define BLIS_MAXIMUM_4M_KC_C ( BLIS_DEFAULT_4M_KC_C + BLIS_EXTEND_4M_KC_C )
|
||||
#define BLIS_MAXIMUM_4M_NC_C ( BLIS_DEFAULT_4M_NC_C + BLIS_EXTEND_4M_NC_C )
|
||||
|
||||
#define BLIS_MAXIMUM_4M_MC_Z ( BLIS_DEFAULT_4M_MC_Z + BLIS_EXTEND_4M_MC_Z )
|
||||
#define BLIS_MAXIMUM_4M_KC_Z ( BLIS_DEFAULT_4M_KC_Z + BLIS_EXTEND_4M_KC_Z )
|
||||
#define BLIS_MAXIMUM_4M_NC_Z ( BLIS_DEFAULT_4M_NC_Z + BLIS_EXTEND_4M_NC_Z )
|
||||
|
||||
//
|
||||
// Compute leading dimension blocksizes used when packing micro-panels.
|
||||
//
|
||||
|
||||
#define BLIS_PACKDIM_4M_MR_C ( BLIS_DEFAULT_4M_MR_C + BLIS_EXTEND_4M_MR_C )
|
||||
#define BLIS_PACKDIM_4M_KR_C ( BLIS_DEFAULT_4M_KR_C + BLIS_EXTEND_4M_KR_C )
|
||||
#define BLIS_PACKDIM_4M_NR_C ( BLIS_DEFAULT_4M_NR_C + BLIS_EXTEND_4M_NR_C )
|
||||
|
||||
#define BLIS_PACKDIM_4M_MR_Z ( BLIS_DEFAULT_4M_MR_Z + BLIS_EXTEND_4M_MR_Z )
|
||||
#define BLIS_PACKDIM_4M_KR_Z ( BLIS_DEFAULT_4M_KR_Z + BLIS_EXTEND_4M_KR_Z )
|
||||
#define BLIS_PACKDIM_4M_NR_Z ( BLIS_DEFAULT_4M_NR_Z + BLIS_EXTEND_4M_NR_Z )
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -35,6 +35,238 @@
|
||||
#ifndef BLIS_KERNEL_POST_MACRO_DEFS_H
|
||||
#define BLIS_KERNEL_POST_MACRO_DEFS_H
|
||||
|
||||
/*
|
||||
// -- Define PASTEMAC-friendly kernel function name macros ---------------------
|
||||
|
||||
//
|
||||
// Level-3
|
||||
//
|
||||
|
||||
// gemm micro-kernels
|
||||
|
||||
#define bli_sGEMM_UKERNEL BLIS_SGEMM_UKERNEL
|
||||
#define bli_dGEMM_UKERNEL BLIS_DGEMM_UKERNEL
|
||||
#define bli_cGEMM_UKERNEL BLIS_CGEMM_UKERNEL
|
||||
#define bli_zGEMM_UKERNEL BLIS_ZGEMM_UKERNEL
|
||||
|
||||
// gemmtrsm_l micro-kernels
|
||||
|
||||
#define bli_sGEMMTRSM_L_UKERNEL BLIS_SGEMMTRSM_L_UKERNEL
|
||||
#define bli_dGEMMTRSM_L_UKERNEL BLIS_DGEMMTRSM_L_UKERNEL
|
||||
#define bli_cGEMMTRSM_L_UKERNEL BLIS_CGEMMTRSM_L_UKERNEL
|
||||
#define bli_zGEMMTRSM_L_UKERNEL BLIS_ZGEMMTRSM_L_UKERNEL
|
||||
|
||||
// gemmtrsm_u micro-kernels
|
||||
|
||||
#define bli_sGEMMTRSM_U_UKERNEL BLIS_SGEMMTRSM_U_UKERNEL
|
||||
#define bli_dGEMMTRSM_U_UKERNEL BLIS_DGEMMTRSM_U_UKERNEL
|
||||
#define bli_cGEMMTRSM_U_UKERNEL BLIS_CGEMMTRSM_U_UKERNEL
|
||||
#define bli_zGEMMTRSM_U_UKERNEL BLIS_ZGEMMTRSM_U_UKERNEL
|
||||
|
||||
// trsm_l micro-kernels
|
||||
|
||||
#define bli_sTRSM_L_UKERNEL BLIS_STRSM_L_UKERNEL
|
||||
#define bli_dTRSM_L_UKERNEL BLIS_DTRSM_L_UKERNEL
|
||||
#define bli_cTRSM_L_UKERNEL BLIS_CTRSM_L_UKERNEL
|
||||
#define bli_zTRSM_L_UKERNEL BLIS_ZTRSM_L_UKERNEL
|
||||
|
||||
// trsm_u micro-kernels
|
||||
|
||||
#define bli_sTRSM_U_UKERNEL BLIS_STRSM_U_UKERNEL
|
||||
#define bli_dTRSM_U_UKERNEL BLIS_DTRSM_U_UKERNEL
|
||||
#define bli_cTRSM_U_UKERNEL BLIS_CTRSM_U_UKERNEL
|
||||
#define bli_zTRSM_U_UKERNEL BLIS_ZTRSM_U_UKERNEL
|
||||
|
||||
//
|
||||
// Level-3 4m
|
||||
//
|
||||
|
||||
// gemm4m micro-kernels
|
||||
|
||||
#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL
|
||||
#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL
|
||||
|
||||
// gemmtrsm4m_l micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL
|
||||
#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL
|
||||
|
||||
// gemmtrsm4m_u micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL
|
||||
#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL
|
||||
|
||||
// trsm4m_l micro-kernels
|
||||
|
||||
#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL
|
||||
#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL
|
||||
|
||||
// trsm4m_u micro-kernels
|
||||
|
||||
#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL
|
||||
#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL
|
||||
|
||||
//
|
||||
// Level-3 3m
|
||||
//
|
||||
|
||||
// gemm3m micro-kernels
|
||||
|
||||
#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL
|
||||
#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL
|
||||
|
||||
// gemmtrsm3m_l micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL
|
||||
#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL
|
||||
|
||||
// gemmtrsm3m_u micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL
|
||||
#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL
|
||||
|
||||
// trsm3m_l micro-kernels
|
||||
|
||||
#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL
|
||||
#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL
|
||||
|
||||
// trsm3m_u micro-kernels
|
||||
|
||||
#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL
|
||||
#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL
|
||||
|
||||
//
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// NOTE: We don't need any PASTEMAC-friendly aliases to packm kernel
|
||||
// macros because they are used directly in the initialization of the
|
||||
// function pointer array, rather than via a templatizing wrapper macro.
|
||||
|
||||
|
||||
//
|
||||
// Level-1f
|
||||
//
|
||||
|
||||
// axpy2v kernels
|
||||
|
||||
#define bli_sssAXPY2V_KERNEL BLIS_SAXPY2V_KERNEL
|
||||
#define bli_dddAXPY2V_KERNEL BLIS_DAXPY2V_KERNEL
|
||||
#define bli_cccAXPY2V_KERNEL BLIS_CAXPY2V_KERNEL
|
||||
#define bli_zzzAXPY2V_KERNEL BLIS_ZAXPY2V_KERNEL
|
||||
|
||||
// dotaxpyv kernels
|
||||
|
||||
#define bli_sssDOTAXPYV_KERNEL BLIS_SDOTAXPYV_KERNEL
|
||||
#define bli_dddDOTAXPYV_KERNEL BLIS_DDOTAXPYV_KERNEL
|
||||
#define bli_cccDOTAXPYV_KERNEL BLIS_CDOTAXPYV_KERNEL
|
||||
#define bli_zzzDOTAXPYV_KERNEL BLIS_ZDOTAXPYV_KERNEL
|
||||
|
||||
// axpyf kernels
|
||||
|
||||
#define bli_sssAXPYF_KERNEL BLIS_SAXPYF_KERNEL
|
||||
#define bli_dddAXPYF_KERNEL BLIS_DAXPYF_KERNEL
|
||||
#define bli_cccAXPYF_KERNEL BLIS_CAXPYF_KERNEL
|
||||
#define bli_zzzAXPYF_KERNEL BLIS_ZAXPYF_KERNEL
|
||||
|
||||
// dotxf kernels
|
||||
|
||||
#define bli_sssDOTXF_KERNEL BLIS_SDOTXF_KERNEL
|
||||
#define bli_dddDOTXF_KERNEL BLIS_DDOTXF_KERNEL
|
||||
#define bli_cccDOTXF_KERNEL BLIS_CDOTXF_KERNEL
|
||||
#define bli_zzzDOTXF_KERNEL BLIS_ZDOTXF_KERNEL
|
||||
|
||||
// dotxaxpyf kernels
|
||||
|
||||
#define bli_sssDOTXAXPYF_KERNEL BLIS_SDOTXAXPYF_KERNEL
|
||||
#define bli_dddDOTXAXPYF_KERNEL BLIS_DDOTXAXPYF_KERNEL
|
||||
#define bli_cccDOTXAXPYF_KERNEL BLIS_CDOTXAXPYF_KERNEL
|
||||
#define bli_zzzDOTXAXPYF_KERNEL BLIS_ZDOTXAXPYF_KERNEL
|
||||
|
||||
|
||||
//
|
||||
// Level-1v
|
||||
//
|
||||
|
||||
// addv kernels
|
||||
|
||||
#define bli_ssADDV_KERNEL BLIS_SADDV_KERNEL
|
||||
#define bli_ddADDV_KERNEL BLIS_DADDV_KERNEL
|
||||
#define bli_ccADDV_KERNEL BLIS_CADDV_KERNEL
|
||||
#define bli_zzADDV_KERNEL BLIS_ZADDV_KERNEL
|
||||
|
||||
// axpyv kernels
|
||||
|
||||
#define bli_sssAXPYV_KERNEL BLIS_SAXPYV_KERNEL
|
||||
#define bli_dddAXPYV_KERNEL BLIS_DAXPYV_KERNEL
|
||||
#define bli_cccAXPYV_KERNEL BLIS_CAXPYV_KERNEL
|
||||
#define bli_zzzAXPYV_KERNEL BLIS_ZAXPYV_KERNEL
|
||||
|
||||
// copyv kernels
|
||||
|
||||
#define bli_ssCOPYV_KERNEL BLIS_SCOPYV_KERNEL
|
||||
#define bli_ddCOPYV_KERNEL BLIS_DCOPYV_KERNEL
|
||||
#define bli_ccCOPYV_KERNEL BLIS_CCOPYV_KERNEL
|
||||
#define bli_zzCOPYV_KERNEL BLIS_ZCOPYV_KERNEL
|
||||
|
||||
// dotv kernels
|
||||
|
||||
#define bli_sssDOTV_KERNEL BLIS_SDOTV_KERNEL
|
||||
#define bli_dddDOTV_KERNEL BLIS_DDOTV_KERNEL
|
||||
#define bli_cccDOTV_KERNEL BLIS_CDOTV_KERNEL
|
||||
#define bli_zzzDOTV_KERNEL BLIS_ZDOTV_KERNEL
|
||||
|
||||
// dotxv kernels
|
||||
|
||||
#define bli_sssDOTXV_KERNEL BLIS_SDOTXV_KERNEL
|
||||
#define bli_dddDOTXV_KERNEL BLIS_DDOTXV_KERNEL
|
||||
#define bli_cccDOTXV_KERNEL BLIS_CDOTXV_KERNEL
|
||||
#define bli_zzzDOTXV_KERNEL BLIS_ZDOTXV_KERNEL
|
||||
|
||||
// invertv kernels
|
||||
|
||||
#define bli_sINVERTV_KERNEL BLIS_SINVERTV_KERNEL
|
||||
#define bli_dINVERTV_KERNEL BLIS_DINVERTV_KERNEL
|
||||
#define bli_cINVERTV_KERNEL BLIS_CINVERTV_KERNEL
|
||||
#define bli_zINVERTV_KERNEL BLIS_ZINVERTV_KERNEL
|
||||
|
||||
// scal2v kernels
|
||||
|
||||
#define bli_sssSCAL2V_KERNEL BLIS_SSCAL2V_KERNEL
|
||||
#define bli_dddSCAL2V_KERNEL BLIS_DSCAL2V_KERNEL
|
||||
#define bli_cccSCAL2V_KERNEL BLIS_CSCAL2V_KERNEL
|
||||
#define bli_zzzSCAL2V_KERNEL BLIS_ZSCAL2V_KERNEL
|
||||
|
||||
// scalv kernels
|
||||
|
||||
#define bli_ssSCALV_KERNEL BLIS_SSCALV_KERNEL
|
||||
#define bli_ddSCALV_KERNEL BLIS_DSCALV_KERNEL
|
||||
#define bli_ccSCALV_KERNEL BLIS_CSCALV_KERNEL
|
||||
#define bli_zzSCALV_KERNEL BLIS_ZSCALV_KERNEL
|
||||
|
||||
// setv kernels
|
||||
|
||||
#define bli_ssSETV_KERNEL BLIS_SSETV_KERNEL
|
||||
#define bli_ddSETV_KERNEL BLIS_DSETV_KERNEL
|
||||
#define bli_ccSETV_KERNEL BLIS_CSETV_KERNEL
|
||||
#define bli_zzSETV_KERNEL BLIS_ZSETV_KERNEL
|
||||
|
||||
// subv kernels
|
||||
|
||||
#define bli_ssSUBV_KERNEL BLIS_SSUBV_KERNEL
|
||||
#define bli_ddSUBV_KERNEL BLIS_DSUBV_KERNEL
|
||||
#define bli_ccSUBV_KERNEL BLIS_CSUBV_KERNEL
|
||||
#define bli_zzSUBV_KERNEL BLIS_ZSUBV_KERNEL
|
||||
|
||||
// swapv kernels
|
||||
|
||||
#define bli_ssSWAPV_KERNEL BLIS_SSWAPV_KERNEL
|
||||
#define bli_ddSWAPV_KERNEL BLIS_DSWAPV_KERNEL
|
||||
#define bli_ccSWAPV_KERNEL BLIS_CSWAPV_KERNEL
|
||||
#define bli_zzSWAPV_KERNEL BLIS_ZSWAPV_KERNEL
|
||||
*/
|
||||
|
||||
|
||||
// -- Maximum register blocksize search ----------------------------------------
|
||||
|
||||
//
|
||||
@@ -43,13 +275,47 @@
|
||||
|
||||
#define BLIS_MAX_DEFAULT_MR_S BLIS_DEFAULT_MR_S
|
||||
#define BLIS_MAX_DEFAULT_MR_D BLIS_DEFAULT_MR_D
|
||||
|
||||
// NOTE: 4m and 3m register blocksizes are assumed to be equal. Thus,
|
||||
// we only inspect the 4m values.
|
||||
|
||||
// c: Choose between the regular and 4m/3m blocksize.
|
||||
#define BLIS_MAX_DEFAULT_MR_C BLIS_DEFAULT_MR_C
|
||||
#if BLIS_DEFAULT_4M_MR_C > BLIS_MAX_DEFAULT_MR_C
|
||||
#undef BLIS_MAX_DEFAULT_MR_C
|
||||
#define BLIS_MAX_DEFAULT_MR_C BLIS_DEFAULT_4M_MR_C
|
||||
#endif
|
||||
|
||||
// z: Choose between the regular and 4m/3m blocksize.
|
||||
#define BLIS_MAX_DEFAULT_MR_Z BLIS_DEFAULT_MR_Z
|
||||
#if BLIS_DEFAULT_4M_MR_Z > BLIS_MAX_DEFAULT_MR_Z
|
||||
#undef BLIS_MAX_DEFAULT_MR_Z
|
||||
#define BLIS_MAX_DEFAULT_MR_Z BLIS_DEFAULT_4M_MR_Z
|
||||
#endif
|
||||
|
||||
//
|
||||
// Find the largest register blocksize NR.
|
||||
//
|
||||
|
||||
#define BLIS_MAX_DEFAULT_NR_S BLIS_DEFAULT_NR_S
|
||||
#define BLIS_MAX_DEFAULT_NR_D BLIS_DEFAULT_NR_D
|
||||
|
||||
// NOTE: 4m and 3m register blocksizes are assumed to be equal. Thus,
|
||||
// we only inspect the 4m values.
|
||||
|
||||
// c: Choose between the regular and 4m/3m blocksize.
|
||||
#define BLIS_MAX_DEFAULT_NR_C BLIS_DEFAULT_NR_C
|
||||
#if BLIS_DEFAULT_4M_NR_C > BLIS_MAX_DEFAULT_NR_C
|
||||
#undef BLIS_MAX_DEFAULT_NR_C
|
||||
#define BLIS_MAX_DEFAULT_NR_C BLIS_DEFAULT_4M_NR_C
|
||||
#endif
|
||||
|
||||
// z: Choose between the regular and 4m/3m blocksize.
|
||||
#define BLIS_MAX_DEFAULT_NR_Z BLIS_DEFAULT_NR_Z
|
||||
#if BLIS_DEFAULT_4M_NR_Z > BLIS_MAX_DEFAULT_NR_Z
|
||||
#undef BLIS_MAX_DEFAULT_NR_Z
|
||||
#define BLIS_MAX_DEFAULT_NR_Z BLIS_DEFAULT_4M_NR_Z
|
||||
#endif
|
||||
|
||||
|
||||
// -- Abbreiviated macros ------------------------------------------------------
|
||||
|
||||
492
frame/include/bli_kernel_pre_macro_defs.h
Normal file
492
frame/include/bli_kernel_pre_macro_defs.h
Normal file
@@ -0,0 +1,492 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_KERNEL_PRE_MACRO_DEFS_H
|
||||
#define BLIS_KERNEL_PRE_MACRO_DEFS_H
|
||||
|
||||
// -- Reference kernel definitions ---------------------------------------------
|
||||
|
||||
//
|
||||
// Level-3
|
||||
//
|
||||
|
||||
// gemm micro-kernels
|
||||
|
||||
#define BLIS_SGEMM_UKERNEL_REF bli_sgemm_ukr_ref
|
||||
#define BLIS_DGEMM_UKERNEL_REF bli_dgemm_ukr_ref
|
||||
#define BLIS_CGEMM_UKERNEL_REF bli_cgemm_ukr_ref
|
||||
#define BLIS_ZGEMM_UKERNEL_REF bli_zgemm_ukr_ref
|
||||
|
||||
// gemmtrsm_l micro-kernels
|
||||
|
||||
#define BLIS_SGEMMTRSM_L_UKERNEL_REF bli_sgemmtrsm_l_ukr_ref
|
||||
#define BLIS_DGEMMTRSM_L_UKERNEL_REF bli_dgemmtrsm_l_ukr_ref
|
||||
#define BLIS_CGEMMTRSM_L_UKERNEL_REF bli_cgemmtrsm_l_ukr_ref
|
||||
#define BLIS_ZGEMMTRSM_L_UKERNEL_REF bli_zgemmtrsm_l_ukr_ref
|
||||
|
||||
// gemmtrsm_u micro-kernels
|
||||
|
||||
#define BLIS_SGEMMTRSM_U_UKERNEL_REF bli_sgemmtrsm_u_ukr_ref
|
||||
#define BLIS_DGEMMTRSM_U_UKERNEL_REF bli_dgemmtrsm_u_ukr_ref
|
||||
#define BLIS_CGEMMTRSM_U_UKERNEL_REF bli_cgemmtrsm_u_ukr_ref
|
||||
#define BLIS_ZGEMMTRSM_U_UKERNEL_REF bli_zgemmtrsm_u_ukr_ref
|
||||
|
||||
// trsm_l micro-kernels
|
||||
|
||||
#define BLIS_STRSM_L_UKERNEL_REF bli_strsm_l_ukr_ref
|
||||
#define BLIS_DTRSM_L_UKERNEL_REF bli_dtrsm_l_ukr_ref
|
||||
#define BLIS_CTRSM_L_UKERNEL_REF bli_ctrsm_l_ukr_ref
|
||||
#define BLIS_ZTRSM_L_UKERNEL_REF bli_ztrsm_l_ukr_ref
|
||||
|
||||
// trsm_u micro-kernels
|
||||
|
||||
#define BLIS_STRSM_U_UKERNEL_REF bli_strsm_u_ukr_ref
|
||||
#define BLIS_DTRSM_U_UKERNEL_REF bli_dtrsm_u_ukr_ref
|
||||
#define BLIS_CTRSM_U_UKERNEL_REF bli_ctrsm_u_ukr_ref
|
||||
#define BLIS_ZTRSM_U_UKERNEL_REF bli_ztrsm_u_ukr_ref
|
||||
|
||||
//
|
||||
// Level-3 4m
|
||||
//
|
||||
|
||||
// gemm4m micro-kernels
|
||||
|
||||
#define BLIS_CGEMM4M_UKERNEL_REF bli_cgemm4m_ukr_ref
|
||||
#define BLIS_ZGEMM4M_UKERNEL_REF bli_zgemm4m_ukr_ref
|
||||
|
||||
// gemmtrsm4m_l micro-kernels
|
||||
|
||||
#define BLIS_CGEMMTRSM4M_L_UKERNEL_REF bli_cgemmtrsm4m_l_ukr_ref
|
||||
#define BLIS_ZGEMMTRSM4M_L_UKERNEL_REF bli_zgemmtrsm4m_l_ukr_ref
|
||||
|
||||
// gemmtrsm4m_u micro-kernels
|
||||
|
||||
#define BLIS_CGEMMTRSM4M_U_UKERNEL_REF bli_cgemmtrsm4m_u_ukr_ref
|
||||
#define BLIS_ZGEMMTRSM4M_U_UKERNEL_REF bli_zgemmtrsm4m_u_ukr_ref
|
||||
|
||||
// trsm4m_l micro-kernels
|
||||
|
||||
#define BLIS_CTRSM4M_L_UKERNEL_REF bli_ctrsm4m_l_ukr_ref
|
||||
#define BLIS_ZTRSM4M_L_UKERNEL_REF bli_ztrsm4m_l_ukr_ref
|
||||
|
||||
// trsm4m_u micro-kernels
|
||||
|
||||
#define BLIS_CTRSM4M_U_UKERNEL_REF bli_ctrsm4m_u_ukr_ref
|
||||
#define BLIS_ZTRSM4M_U_UKERNEL_REF bli_ztrsm4m_u_ukr_ref
|
||||
|
||||
//
|
||||
// Level-3 3m
|
||||
//
|
||||
|
||||
// gemm3m micro-kernels
|
||||
|
||||
#define BLIS_CGEMM3M_UKERNEL_REF bli_cgemm3m_ukr_ref
|
||||
#define BLIS_ZGEMM3M_UKERNEL_REF bli_zgemm3m_ukr_ref
|
||||
|
||||
// gemmtrsm3m_l micro-kernels
|
||||
|
||||
#define BLIS_CGEMMTRSM3M_L_UKERNEL_REF bli_cgemmtrsm3m_l_ukr_ref
|
||||
#define BLIS_ZGEMMTRSM3M_L_UKERNEL_REF bli_zgemmtrsm3m_l_ukr_ref
|
||||
|
||||
// gemmtrsm3m_u micro-kernels
|
||||
|
||||
#define BLIS_CGEMMTRSM3M_U_UKERNEL_REF bli_cgemmtrsm3m_u_ukr_ref
|
||||
#define BLIS_ZGEMMTRSM3M_U_UKERNEL_REF bli_zgemmtrsm3m_u_ukr_ref
|
||||
|
||||
// trsm3m_l micro-kernels
|
||||
|
||||
#define BLIS_CTRSM3M_L_UKERNEL_REF bli_ctrsm3m_l_ukr_ref
|
||||
#define BLIS_ZTRSM3M_L_UKERNEL_REF bli_ztrsm3m_l_ukr_ref
|
||||
|
||||
// trsm3m_u micro-kernels
|
||||
|
||||
#define BLIS_CTRSM3M_U_UKERNEL_REF bli_ctrsm3m_u_ukr_ref
|
||||
#define BLIS_ZTRSM3M_U_UKERNEL_REF bli_ztrsm3m_u_ukr_ref
|
||||
|
||||
//
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// packm_2xk kernels
|
||||
|
||||
#define BLIS_SPACKM_2XK_KERNEL_REF bli_spackm_ref_2xk
|
||||
#define BLIS_DPACKM_2XK_KERNEL_REF bli_dpackm_ref_2xk
|
||||
#define BLIS_CPACKM_2XK_KERNEL_REF bli_cpackm_ref_2xk
|
||||
#define BLIS_ZPACKM_2XK_KERNEL_REF bli_zpackm_ref_2xk
|
||||
|
||||
// packm_4xk kernels
|
||||
|
||||
#define BLIS_SPACKM_4XK_KERNEL_REF bli_spackm_ref_4xk
|
||||
#define BLIS_DPACKM_4XK_KERNEL_REF bli_dpackm_ref_4xk
|
||||
#define BLIS_CPACKM_4XK_KERNEL_REF bli_cpackm_ref_4xk
|
||||
#define BLIS_ZPACKM_4XK_KERNEL_REF bli_zpackm_ref_4xk
|
||||
|
||||
// packm_6xk kernels
|
||||
|
||||
#define BLIS_SPACKM_6XK_KERNEL_REF bli_spackm_ref_6xk
|
||||
#define BLIS_DPACKM_6XK_KERNEL_REF bli_dpackm_ref_6xk
|
||||
#define BLIS_CPACKM_6XK_KERNEL_REF bli_cpackm_ref_6xk
|
||||
#define BLIS_ZPACKM_6XK_KERNEL_REF bli_zpackm_ref_6xk
|
||||
|
||||
// packm_8xk kernels
|
||||
|
||||
#define BLIS_SPACKM_8XK_KERNEL_REF bli_spackm_ref_8xk
|
||||
#define BLIS_DPACKM_8XK_KERNEL_REF bli_dpackm_ref_8xk
|
||||
#define BLIS_CPACKM_8XK_KERNEL_REF bli_cpackm_ref_8xk
|
||||
#define BLIS_ZPACKM_8XK_KERNEL_REF bli_zpackm_ref_8xk
|
||||
|
||||
// packm_10xk kernels
|
||||
|
||||
#define BLIS_SPACKM_10XK_KERNEL_REF bli_spackm_ref_10xk
|
||||
#define BLIS_DPACKM_10XK_KERNEL_REF bli_dpackm_ref_10xk
|
||||
#define BLIS_CPACKM_10XK_KERNEL_REF bli_cpackm_ref_10xk
|
||||
#define BLIS_ZPACKM_10XK_KERNEL_REF bli_zpackm_ref_10xk
|
||||
|
||||
// packm_12xk kernels
|
||||
|
||||
#define BLIS_SPACKM_12XK_KERNEL_REF bli_spackm_ref_12xk
|
||||
#define BLIS_DPACKM_12XK_KERNEL_REF bli_dpackm_ref_12xk
|
||||
#define BLIS_CPACKM_12XK_KERNEL_REF bli_cpackm_ref_12xk
|
||||
#define BLIS_ZPACKM_12XK_KERNEL_REF bli_zpackm_ref_12xk
|
||||
|
||||
// packm_14xk kernels
|
||||
|
||||
#define BLIS_SPACKM_14XK_KERNEL_REF bli_spackm_ref_14xk
|
||||
#define BLIS_DPACKM_14XK_KERNEL_REF bli_dpackm_ref_14xk
|
||||
#define BLIS_CPACKM_14XK_KERNEL_REF bli_cpackm_ref_14xk
|
||||
#define BLIS_ZPACKM_14XK_KERNEL_REF bli_zpackm_ref_14xk
|
||||
|
||||
// packm_16xk kernels
|
||||
|
||||
#define BLIS_SPACKM_16XK_KERNEL_REF bli_spackm_ref_16xk
|
||||
#define BLIS_DPACKM_16XK_KERNEL_REF bli_dpackm_ref_16xk
|
||||
#define BLIS_CPACKM_16XK_KERNEL_REF bli_cpackm_ref_16xk
|
||||
#define BLIS_ZPACKM_16XK_KERNEL_REF bli_zpackm_ref_16xk
|
||||
|
||||
// packm_2xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_2XK_RI_KERNEL_REF bli_spackm_ref_2xk_ri
|
||||
#define BLIS_DPACKM_2XK_RI_KERNEL_REF bli_dpackm_ref_2xk_ri
|
||||
#define BLIS_CPACKM_2XK_RI_KERNEL_REF bli_cpackm_ref_2xk_ri
|
||||
#define BLIS_ZPACKM_2XK_RI_KERNEL_REF bli_zpackm_ref_2xk_ri
|
||||
|
||||
// packm_4xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_4XK_RI_KERNEL_REF bli_spackm_ref_4xk_ri
|
||||
#define BLIS_DPACKM_4XK_RI_KERNEL_REF bli_dpackm_ref_4xk_ri
|
||||
#define BLIS_CPACKM_4XK_RI_KERNEL_REF bli_cpackm_ref_4xk_ri
|
||||
#define BLIS_ZPACKM_4XK_RI_KERNEL_REF bli_zpackm_ref_4xk_ri
|
||||
|
||||
// packm_6xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_6XK_RI_KERNEL_REF bli_spackm_ref_6xk_ri
|
||||
#define BLIS_DPACKM_6XK_RI_KERNEL_REF bli_dpackm_ref_6xk_ri
|
||||
#define BLIS_CPACKM_6XK_RI_KERNEL_REF bli_cpackm_ref_6xk_ri
|
||||
#define BLIS_ZPACKM_6XK_RI_KERNEL_REF bli_zpackm_ref_6xk_ri
|
||||
|
||||
// packm_8xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_8XK_RI_KERNEL_REF bli_spackm_ref_8xk_ri
|
||||
#define BLIS_DPACKM_8XK_RI_KERNEL_REF bli_dpackm_ref_8xk_ri
|
||||
#define BLIS_CPACKM_8XK_RI_KERNEL_REF bli_cpackm_ref_8xk_ri
|
||||
#define BLIS_ZPACKM_8XK_RI_KERNEL_REF bli_zpackm_ref_8xk_ri
|
||||
|
||||
// packm_10xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_10XK_RI_KERNEL_REF bli_spackm_ref_10xk_ri
|
||||
#define BLIS_DPACKM_10XK_RI_KERNEL_REF bli_dpackm_ref_10xk_ri
|
||||
#define BLIS_CPACKM_10XK_RI_KERNEL_REF bli_cpackm_ref_10xk_ri
|
||||
#define BLIS_ZPACKM_10XK_RI_KERNEL_REF bli_zpackm_ref_10xk_ri
|
||||
|
||||
// packm_12xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_12XK_RI_KERNEL_REF bli_spackm_ref_12xk_ri
|
||||
#define BLIS_DPACKM_12XK_RI_KERNEL_REF bli_dpackm_ref_12xk_ri
|
||||
#define BLIS_CPACKM_12XK_RI_KERNEL_REF bli_cpackm_ref_12xk_ri
|
||||
#define BLIS_ZPACKM_12XK_RI_KERNEL_REF bli_zpackm_ref_12xk_ri
|
||||
|
||||
// packm_14xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_14XK_RI_KERNEL_REF bli_spackm_ref_14xk_ri
|
||||
#define BLIS_DPACKM_14XK_RI_KERNEL_REF bli_dpackm_ref_14xk_ri
|
||||
#define BLIS_CPACKM_14XK_RI_KERNEL_REF bli_cpackm_ref_14xk_ri
|
||||
#define BLIS_ZPACKM_14XK_RI_KERNEL_REF bli_zpackm_ref_14xk_ri
|
||||
|
||||
// packm_16xk_ri kernels
|
||||
|
||||
#define BLIS_SPACKM_16XK_RI_KERNEL_REF bli_spackm_ref_16xk_ri
|
||||
#define BLIS_DPACKM_16XK_RI_KERNEL_REF bli_dpackm_ref_16xk_ri
|
||||
#define BLIS_CPACKM_16XK_RI_KERNEL_REF bli_cpackm_ref_16xk_ri
|
||||
#define BLIS_ZPACKM_16XK_RI_KERNEL_REF bli_zpackm_ref_16xk_ri
|
||||
|
||||
// packm_2xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_2XK_RI3_KERNEL_REF bli_spackm_ref_2xk_ri3
|
||||
#define BLIS_DPACKM_2XK_RI3_KERNEL_REF bli_dpackm_ref_2xk_ri3
|
||||
#define BLIS_CPACKM_2XK_RI3_KERNEL_REF bli_cpackm_ref_2xk_ri3
|
||||
#define BLIS_ZPACKM_2XK_RI3_KERNEL_REF bli_zpackm_ref_2xk_ri3
|
||||
|
||||
// packm_4xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_4XK_RI3_KERNEL_REF bli_spackm_ref_4xk_ri3
|
||||
#define BLIS_DPACKM_4XK_RI3_KERNEL_REF bli_dpackm_ref_4xk_ri3
|
||||
#define BLIS_CPACKM_4XK_RI3_KERNEL_REF bli_cpackm_ref_4xk_ri3
|
||||
#define BLIS_ZPACKM_4XK_RI3_KERNEL_REF bli_zpackm_ref_4xk_ri3
|
||||
|
||||
// packm_6xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_6XK_RI3_KERNEL_REF bli_spackm_ref_6xk_ri3
|
||||
#define BLIS_DPACKM_6XK_RI3_KERNEL_REF bli_dpackm_ref_6xk_ri3
|
||||
#define BLIS_CPACKM_6XK_RI3_KERNEL_REF bli_cpackm_ref_6xk_ri3
|
||||
#define BLIS_ZPACKM_6XK_RI3_KERNEL_REF bli_zpackm_ref_6xk_ri3
|
||||
|
||||
// packm_8xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_8XK_RI3_KERNEL_REF bli_spackm_ref_8xk_ri3
|
||||
#define BLIS_DPACKM_8XK_RI3_KERNEL_REF bli_dpackm_ref_8xk_ri3
|
||||
#define BLIS_CPACKM_8XK_RI3_KERNEL_REF bli_cpackm_ref_8xk_ri3
|
||||
#define BLIS_ZPACKM_8XK_RI3_KERNEL_REF bli_zpackm_ref_8xk_ri3
|
||||
|
||||
// packm_10xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_10XK_RI3_KERNEL_REF bli_spackm_ref_10xk_ri3
|
||||
#define BLIS_DPACKM_10XK_RI3_KERNEL_REF bli_dpackm_ref_10xk_ri3
|
||||
#define BLIS_CPACKM_10XK_RI3_KERNEL_REF bli_cpackm_ref_10xk_ri3
|
||||
#define BLIS_ZPACKM_10XK_RI3_KERNEL_REF bli_zpackm_ref_10xk_ri3
|
||||
|
||||
// packm_12xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_12XK_RI3_KERNEL_REF bli_spackm_ref_12xk_ri3
|
||||
#define BLIS_DPACKM_12XK_RI3_KERNEL_REF bli_dpackm_ref_12xk_ri3
|
||||
#define BLIS_CPACKM_12XK_RI3_KERNEL_REF bli_cpackm_ref_12xk_ri3
|
||||
#define BLIS_ZPACKM_12XK_RI3_KERNEL_REF bli_zpackm_ref_12xk_ri3
|
||||
|
||||
// packm_14xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_14XK_RI3_KERNEL_REF bli_spackm_ref_14xk_ri3
|
||||
#define BLIS_DPACKM_14XK_RI3_KERNEL_REF bli_dpackm_ref_14xk_ri3
|
||||
#define BLIS_CPACKM_14XK_RI3_KERNEL_REF bli_cpackm_ref_14xk_ri3
|
||||
#define BLIS_ZPACKM_14XK_RI3_KERNEL_REF bli_zpackm_ref_14xk_ri3
|
||||
|
||||
// packm_16xk_ri3 kernels
|
||||
|
||||
#define BLIS_SPACKM_16XK_RI3_KERNEL_REF bli_spackm_ref_16xk_ri3
|
||||
#define BLIS_DPACKM_16XK_RI3_KERNEL_REF bli_dpackm_ref_16xk_ri3
|
||||
#define BLIS_CPACKM_16XK_RI3_KERNEL_REF bli_cpackm_ref_16xk_ri3
|
||||
#define BLIS_ZPACKM_16XK_RI3_KERNEL_REF bli_zpackm_ref_16xk_ri3
|
||||
|
||||
// unpack_2xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_2XK_KERNEL_REF bli_sunpackm_ref_2xk
|
||||
#define BLIS_DUNPACKM_2XK_KERNEL_REF bli_dunpackm_ref_2xk
|
||||
#define BLIS_CUNPACKM_2XK_KERNEL_REF bli_cunpackm_ref_2xk
|
||||
#define BLIS_ZUNPACKM_2XK_KERNEL_REF bli_zunpackm_ref_2xk
|
||||
|
||||
// unpack_4xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_4XK_KERNEL_REF bli_sunpackm_ref_4xk
|
||||
#define BLIS_DUNPACKM_4XK_KERNEL_REF bli_dunpackm_ref_4xk
|
||||
#define BLIS_CUNPACKM_4XK_KERNEL_REF bli_cunpackm_ref_4xk
|
||||
#define BLIS_ZUNPACKM_4XK_KERNEL_REF bli_zunpackm_ref_4xk
|
||||
|
||||
// unpack_6xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_6XK_KERNEL_REF bli_sunpackm_ref_6xk
|
||||
#define BLIS_DUNPACKM_6XK_KERNEL_REF bli_dunpackm_ref_6xk
|
||||
#define BLIS_CUNPACKM_6XK_KERNEL_REF bli_cunpackm_ref_6xk
|
||||
#define BLIS_ZUNPACKM_6XK_KERNEL_REF bli_zunpackm_ref_6xk
|
||||
|
||||
// unpack_8xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_8XK_KERNEL_REF bli_sunpackm_ref_8xk
|
||||
#define BLIS_DUNPACKM_8XK_KERNEL_REF bli_dunpackm_ref_8xk
|
||||
#define BLIS_CUNPACKM_8XK_KERNEL_REF bli_cunpackm_ref_8xk
|
||||
#define BLIS_ZUNPACKM_8XK_KERNEL_REF bli_zunpackm_ref_8xk
|
||||
|
||||
// unpack_10xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_10XK_KERNEL_REF bli_sunpackm_ref_10xk
|
||||
#define BLIS_DUNPACKM_10XK_KERNEL_REF bli_dunpackm_ref_10xk
|
||||
#define BLIS_CUNPACKM_10XK_KERNEL_REF bli_cunpackm_ref_10xk
|
||||
#define BLIS_ZUNPACKM_10XK_KERNEL_REF bli_zunpackm_ref_10xk
|
||||
|
||||
// unpack_12xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_12XK_KERNEL_REF bli_sunpackm_ref_12xk
|
||||
#define BLIS_DUNPACKM_12XK_KERNEL_REF bli_dunpackm_ref_12xk
|
||||
#define BLIS_CUNPACKM_12XK_KERNEL_REF bli_cunpackm_ref_12xk
|
||||
#define BLIS_ZUNPACKM_12XK_KERNEL_REF bli_zunpackm_ref_12xk
|
||||
|
||||
// unpack_14xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_14XK_KERNEL_REF bli_sunpackm_ref_14xk
|
||||
#define BLIS_DUNPACKM_14XK_KERNEL_REF bli_dunpackm_ref_14xk
|
||||
#define BLIS_CUNPACKM_14XK_KERNEL_REF bli_cunpackm_ref_14xk
|
||||
#define BLIS_ZUNPACKM_14XK_KERNEL_REF bli_zunpackm_ref_14xk
|
||||
|
||||
// unpack_16xk kernels
|
||||
|
||||
#define BLIS_SUNPACKM_16XK_KERNEL_REF bli_sunpackm_ref_16xk
|
||||
#define BLIS_DUNPACKM_16XK_KERNEL_REF bli_dunpackm_ref_16xk
|
||||
#define BLIS_CUNPACKM_16XK_KERNEL_REF bli_cunpackm_ref_16xk
|
||||
#define BLIS_ZUNPACKM_16XK_KERNEL_REF bli_zunpackm_ref_16xk
|
||||
|
||||
//
|
||||
// Level-1f
|
||||
//
|
||||
|
||||
// axpy2v kernels
|
||||
|
||||
#define BLIS_SAXPY2V_KERNEL_REF bli_sssaxpy2v_ref
|
||||
#define BLIS_DAXPY2V_KERNEL_REF bli_dddaxpy2v_ref
|
||||
#define BLIS_CAXPY2V_KERNEL_REF bli_cccaxpy2v_ref
|
||||
#define BLIS_ZAXPY2V_KERNEL_REF bli_zzzaxpy2v_ref
|
||||
|
||||
// dotaxpyv kernels
|
||||
|
||||
#define BLIS_SDOTAXPYV_KERNEL_REF bli_sssdotaxpyv_ref
|
||||
#define BLIS_DDOTAXPYV_KERNEL_REF bli_ddddotaxpyv_ref
|
||||
#define BLIS_CDOTAXPYV_KERNEL_REF bli_cccdotaxpyv_ref
|
||||
#define BLIS_ZDOTAXPYV_KERNEL_REF bli_zzzdotaxpyv_ref
|
||||
|
||||
// axpyf kernels
|
||||
|
||||
#define BLIS_SAXPYF_KERNEL_REF bli_sssaxpyf_ref
|
||||
#define BLIS_DAXPYF_KERNEL_REF bli_dddaxpyf_ref
|
||||
#define BLIS_CAXPYF_KERNEL_REF bli_cccaxpyf_ref
|
||||
#define BLIS_ZAXPYF_KERNEL_REF bli_zzzaxpyf_ref
|
||||
|
||||
// dotxf kernels
|
||||
|
||||
#define BLIS_SDOTXF_KERNEL_REF bli_sssdotxf_ref
|
||||
#define BLIS_DDOTXF_KERNEL_REF bli_ddddotxf_ref
|
||||
#define BLIS_CDOTXF_KERNEL_REF bli_cccdotxf_ref
|
||||
#define BLIS_ZDOTXF_KERNEL_REF bli_zzzdotxf_ref
|
||||
|
||||
// dotxaxpyf kernels
|
||||
|
||||
//#define BLIS_SDOTXAXPYF_KERNEL_REF bli_sssdotxaxpyf_ref_var1
|
||||
//#define BLIS_DDOTXAXPYF_KERNEL_REF bli_ddddotxaxpyf_ref_var1
|
||||
//#define BLIS_CDOTXAXPYF_KERNEL_REF bli_cccdotxaxpyf_ref_var1
|
||||
//#define BLIS_ZDOTXAXPYF_KERNEL_REF bli_zzzdotxaxpyf_ref_var1
|
||||
#define BLIS_SDOTXAXPYF_KERNEL_REF bli_sssdotxaxpyf_ref_var2
|
||||
#define BLIS_DDOTXAXPYF_KERNEL_REF bli_ddddotxaxpyf_ref_var2
|
||||
#define BLIS_CDOTXAXPYF_KERNEL_REF bli_cccdotxaxpyf_ref_var2
|
||||
#define BLIS_ZDOTXAXPYF_KERNEL_REF bli_zzzdotxaxpyf_ref_var2
|
||||
|
||||
//
|
||||
// Level-1v
|
||||
//
|
||||
|
||||
// addv kernels
|
||||
|
||||
#define BLIS_SADDV_KERNEL_REF bli_ssaddv_ref
|
||||
#define BLIS_DADDV_KERNEL_REF bli_ddaddv_ref
|
||||
#define BLIS_CADDV_KERNEL_REF bli_ccaddv_ref
|
||||
#define BLIS_ZADDV_KERNEL_REF bli_zzaddv_ref
|
||||
|
||||
// axpyv kernels
|
||||
|
||||
#define BLIS_SAXPYV_KERNEL_REF bli_sssaxpyv_ref
|
||||
#define BLIS_DAXPYV_KERNEL_REF bli_dddaxpyv_ref
|
||||
#define BLIS_CAXPYV_KERNEL_REF bli_cccaxpyv_ref
|
||||
#define BLIS_ZAXPYV_KERNEL_REF bli_zzzaxpyv_ref
|
||||
|
||||
// copyv kernels
|
||||
|
||||
#define BLIS_SCOPYV_KERNEL_REF bli_sscopyv_ref
|
||||
#define BLIS_DCOPYV_KERNEL_REF bli_ddcopyv_ref
|
||||
#define BLIS_CCOPYV_KERNEL_REF bli_cccopyv_ref
|
||||
#define BLIS_ZCOPYV_KERNEL_REF bli_zzcopyv_ref
|
||||
|
||||
// dotv kernels
|
||||
|
||||
#define BLIS_SDOTV_KERNEL_REF bli_sssdotv_ref
|
||||
#define BLIS_DDOTV_KERNEL_REF bli_ddddotv_ref
|
||||
#define BLIS_CDOTV_KERNEL_REF bli_cccdotv_ref
|
||||
#define BLIS_ZDOTV_KERNEL_REF bli_zzzdotv_ref
|
||||
|
||||
// dotxv kernels
|
||||
|
||||
#define BLIS_SDOTXV_KERNEL_REF bli_sssdotxv_ref
|
||||
#define BLIS_DDOTXV_KERNEL_REF bli_ddddotxv_ref
|
||||
#define BLIS_CDOTXV_KERNEL_REF bli_cccdotxv_ref
|
||||
#define BLIS_ZDOTXV_KERNEL_REF bli_zzzdotxv_ref
|
||||
|
||||
// invertv kernels
|
||||
|
||||
#define BLIS_SINVERTV_KERNEL_REF bli_sinvertv_ref
|
||||
#define BLIS_DINVERTV_KERNEL_REF bli_dinvertv_ref
|
||||
#define BLIS_CINVERTV_KERNEL_REF bli_cinvertv_ref
|
||||
#define BLIS_ZINVERTV_KERNEL_REF bli_zinvertv_ref
|
||||
|
||||
// scal2v kernels
|
||||
|
||||
#define BLIS_SSCAL2V_KERNEL_REF bli_sssscal2v_ref
|
||||
#define BLIS_DSCAL2V_KERNEL_REF bli_dddscal2v_ref
|
||||
#define BLIS_CSCAL2V_KERNEL_REF bli_cccscal2v_ref
|
||||
#define BLIS_ZSCAL2V_KERNEL_REF bli_zzzscal2v_ref
|
||||
|
||||
// scalv kernels
|
||||
|
||||
#define BLIS_SSCALV_KERNEL_REF bli_ssscalv_ref
|
||||
#define BLIS_DSCALV_KERNEL_REF bli_ddscalv_ref
|
||||
#define BLIS_CSCALV_KERNEL_REF bli_ccscalv_ref
|
||||
#define BLIS_ZSCALV_KERNEL_REF bli_zzscalv_ref
|
||||
|
||||
// setv kernels
|
||||
|
||||
#define BLIS_SSETV_KERNEL_REF bli_sssetv_ref
|
||||
#define BLIS_DSETV_KERNEL_REF bli_ddsetv_ref
|
||||
#define BLIS_CSETV_KERNEL_REF bli_ccsetv_ref
|
||||
#define BLIS_ZSETV_KERNEL_REF bli_zzsetv_ref
|
||||
|
||||
// subv kernels
|
||||
|
||||
#define BLIS_SSUBV_KERNEL_REF bli_sssubv_ref
|
||||
#define BLIS_DSUBV_KERNEL_REF bli_ddsubv_ref
|
||||
#define BLIS_CSUBV_KERNEL_REF bli_ccsubv_ref
|
||||
#define BLIS_ZSUBV_KERNEL_REF bli_zzsubv_ref
|
||||
|
||||
// swapv kernels
|
||||
|
||||
#define BLIS_SSWAPV_KERNEL_REF bli_ssswapv_ref
|
||||
#define BLIS_DSWAPV_KERNEL_REF bli_ddswapv_ref
|
||||
#define BLIS_CSWAPV_KERNEL_REF bli_ccswapv_ref
|
||||
#define BLIS_ZSWAPV_KERNEL_REF bli_zzswapv_ref
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
741
frame/include/bli_kernel_prototypes.h
Normal file
741
frame/include/bli_kernel_prototypes.h
Normal file
@@ -0,0 +1,741 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_KERNEL_PROTOTYPES_H
|
||||
#define BLIS_KERNEL_PROTOTYPES_H
|
||||
|
||||
|
||||
// -- Define PASTEMAC-friendly kernel function name macros ---------------------
|
||||
|
||||
//
|
||||
// Level-3
|
||||
//
|
||||
|
||||
// gemm micro-kernels
|
||||
|
||||
#define bli_sGEMM_UKERNEL BLIS_SGEMM_UKERNEL
|
||||
#define bli_dGEMM_UKERNEL BLIS_DGEMM_UKERNEL
|
||||
#define bli_cGEMM_UKERNEL BLIS_CGEMM_UKERNEL
|
||||
#define bli_zGEMM_UKERNEL BLIS_ZGEMM_UKERNEL
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( GEMM_UKERNEL )
|
||||
|
||||
// gemmtrsm_l micro-kernels
|
||||
|
||||
#define bli_sGEMMTRSM_L_UKERNEL BLIS_SGEMMTRSM_L_UKERNEL
|
||||
#define bli_dGEMMTRSM_L_UKERNEL BLIS_DGEMMTRSM_L_UKERNEL
|
||||
#define bli_cGEMMTRSM_L_UKERNEL BLIS_CGEMMTRSM_L_UKERNEL
|
||||
#define bli_zGEMMTRSM_L_UKERNEL BLIS_ZGEMMTRSM_L_UKERNEL
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a10, \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b01, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( GEMMTRSM_L_UKERNEL )
|
||||
|
||||
// gemmtrsm_u micro-kernels
|
||||
|
||||
#define bli_sGEMMTRSM_U_UKERNEL BLIS_SGEMMTRSM_U_UKERNEL
|
||||
#define bli_dGEMMTRSM_U_UKERNEL BLIS_DGEMMTRSM_U_UKERNEL
|
||||
#define bli_cGEMMTRSM_U_UKERNEL BLIS_CGEMMTRSM_U_UKERNEL
|
||||
#define bli_zGEMMTRSM_U_UKERNEL BLIS_ZGEMMTRSM_U_UKERNEL
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a12, \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b21, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( GEMMTRSM_U_UKERNEL )
|
||||
|
||||
// trsm_l micro-kernels
|
||||
|
||||
#define bli_sTRSM_L_UKERNEL BLIS_STRSM_L_UKERNEL
|
||||
#define bli_dTRSM_L_UKERNEL BLIS_DTRSM_L_UKERNEL
|
||||
#define bli_cTRSM_L_UKERNEL BLIS_CTRSM_L_UKERNEL
|
||||
#define bli_zTRSM_L_UKERNEL BLIS_ZTRSM_L_UKERNEL
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( TRSM_L_UKERNEL )
|
||||
|
||||
// trsm_u micro-kernels
|
||||
|
||||
#define bli_sTRSM_U_UKERNEL BLIS_STRSM_U_UKERNEL
|
||||
#define bli_dTRSM_U_UKERNEL BLIS_DTRSM_U_UKERNEL
|
||||
#define bli_cTRSM_U_UKERNEL BLIS_CTRSM_U_UKERNEL
|
||||
#define bli_zTRSM_U_UKERNEL BLIS_ZTRSM_U_UKERNEL
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( TRSM_U_UKERNEL )
|
||||
|
||||
|
||||
//
|
||||
// Level-3 4m
|
||||
//
|
||||
|
||||
// gemm4m micro-kernels
|
||||
|
||||
#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL
|
||||
#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( GEMM4M_UKERNEL )
|
||||
|
||||
// gemmtrsm4m_l micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL
|
||||
#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a10, \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b01, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( GEMMTRSM4M_L_UKERNEL )
|
||||
|
||||
// gemmtrsm4m_u micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL
|
||||
#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a12, \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b21, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( GEMMTRSM4M_U_UKERNEL )
|
||||
|
||||
// trsm4m_l micro-kernels
|
||||
|
||||
#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL
|
||||
#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
ctype_r* restrict a11r, \
|
||||
ctype_r* restrict b11r, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( TRSM4M_L_UKERNEL )
|
||||
|
||||
// trsm4m_u micro-kernels
|
||||
|
||||
#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL
|
||||
#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
ctype_r* restrict a11r, \
|
||||
ctype_r* restrict b11r, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( TRSM4M_U_UKERNEL )
|
||||
|
||||
|
||||
//
|
||||
// Level-3 3m
|
||||
//
|
||||
|
||||
// gemm3m micro-kernels
|
||||
|
||||
#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL
|
||||
#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( GEMM3M_UKERNEL )
|
||||
|
||||
// gemmtrsm3m_l micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL
|
||||
#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a10, \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b01, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( GEMMTRSM3M_L_UKERNEL )
|
||||
|
||||
// gemmtrsm3m_u micro-kernels
|
||||
|
||||
#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL
|
||||
#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a12, \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b21, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( GEMMTRSM3M_U_UKERNEL )
|
||||
|
||||
// trsm3m_l micro-kernels
|
||||
|
||||
#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL
|
||||
#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
ctype_r* restrict a11r, \
|
||||
ctype_r* restrict b11r, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( TRSM3M_L_UKERNEL )
|
||||
|
||||
// trsm3m_u micro-kernels
|
||||
|
||||
#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL
|
||||
#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL
|
||||
|
||||
#undef GENTPROTCO
|
||||
#define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
ctype_r* restrict a11r, \
|
||||
ctype_r* restrict b11r, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTCO_BASIC( TRSM3M_U_UKERNEL )
|
||||
|
||||
|
||||
//
|
||||
// Level-1m
|
||||
//
|
||||
|
||||
// NOTE: We don't need any PASTEMAC-friendly aliases to packm kernel
|
||||
// macros because they are used directly in the initialization of the
|
||||
// function pointer array, rather than via a templatizing wrapper macro.
|
||||
|
||||
|
||||
//
|
||||
// Level-1f
|
||||
//
|
||||
|
||||
// axpy2v kernels
|
||||
|
||||
#define bli_sssAXPY2V_KERNEL BLIS_SAXPY2V_KERNEL
|
||||
#define bli_dddAXPY2V_KERNEL BLIS_DAXPY2V_KERNEL
|
||||
#define bli_cccAXPY2V_KERNEL BLIS_CAXPY2V_KERNEL
|
||||
#define bli_zzzAXPY2V_KERNEL BLIS_ZAXPY2V_KERNEL
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype_xy* restrict alpha1, \
|
||||
ctype_xy* restrict alpha2, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_z* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( AXPY2V_KERNEL )
|
||||
|
||||
// dotaxpyv kernels
|
||||
|
||||
#define bli_sssDOTAXPYV_KERNEL BLIS_SDOTAXPYV_KERNEL
|
||||
#define bli_dddDOTAXPYV_KERNEL BLIS_DDOTAXPYV_KERNEL
|
||||
#define bli_cccDOTAXPYV_KERNEL BLIS_CDOTAXPYV_KERNEL
|
||||
#define bli_zzzDOTAXPYV_KERNEL BLIS_ZDOTAXPYV_KERNEL
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,kername) \
|
||||
( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype_x* restrict alpha, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_xy* restrict rho, \
|
||||
ctype_z* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( DOTAXPYV_KERNEL )
|
||||
|
||||
// axpyf kernels
|
||||
|
||||
#define bli_sssAXPYF_KERNEL BLIS_SAXPYF_KERNEL
|
||||
#define bli_dddAXPYF_KERNEL BLIS_DAXPYF_KERNEL
|
||||
#define bli_cccAXPYF_KERNEL BLIS_CAXPYF_KERNEL
|
||||
#define bli_zzzAXPYF_KERNEL BLIS_ZAXPYF_KERNEL
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,kername) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ax* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( AXPYF_KERNEL )
|
||||
|
||||
// dotxf kernels
|
||||
|
||||
#define bli_sssDOTXF_KERNEL BLIS_SDOTXF_KERNEL
|
||||
#define bli_dddDOTXF_KERNEL BLIS_DDOTXF_KERNEL
|
||||
#define bli_cccDOTXF_KERNEL BLIS_CDOTXF_KERNEL
|
||||
#define bli_zzzDOTXF_KERNEL BLIS_ZDOTXF_KERNEL
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,kername) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ax* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict beta, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( DOTXF_KERNEL )
|
||||
|
||||
// dotxaxpyf kernels
|
||||
|
||||
#define bli_sssDOTXAXPYF_KERNEL BLIS_SDOTXAXPYF_KERNEL
|
||||
#define bli_dddDOTXAXPYF_KERNEL BLIS_DDOTXAXPYF_KERNEL
|
||||
#define bli_cccDOTXAXPYF_KERNEL BLIS_CDOTXAXPYF_KERNEL
|
||||
#define bli_zzzDOTXAXPYF_KERNEL BLIS_ZDOTXAXPYF_KERNEL
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,kername) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ab* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_b* restrict w, inc_t incw, \
|
||||
ctype_b* restrict x, inc_t incx, \
|
||||
ctype_c* restrict beta, \
|
||||
ctype_c* restrict y, inc_t incy, \
|
||||
ctype_c* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( DOTXAXPYF_KERNEL )
|
||||
|
||||
|
||||
//
|
||||
// Level-1v
|
||||
//
|
||||
|
||||
// addv kernels
|
||||
|
||||
#define bli_ssADDV_KERNEL BLIS_SADDV_KERNEL
|
||||
#define bli_ddADDV_KERNEL BLIS_DADDV_KERNEL
|
||||
#define bli_ccADDV_KERNEL BLIS_CADDV_KERNEL
|
||||
#define bli_zzADDV_KERNEL BLIS_ZADDV_KERNEL
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
|
||||
\
|
||||
void PASTEMAC2(chx,chy,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC( ADDV_KERNEL )
|
||||
|
||||
// axpyv kernels
|
||||
|
||||
#define bli_sssAXPYV_KERNEL BLIS_SAXPYV_KERNEL
|
||||
#define bli_dddAXPYV_KERNEL BLIS_DAXPYV_KERNEL
|
||||
#define bli_cccAXPYV_KERNEL BLIS_CAXPYV_KERNEL
|
||||
#define bli_zzzAXPYV_KERNEL BLIS_ZAXPYV_KERNEL
|
||||
|
||||
#undef GENTPROT3
|
||||
#define GENTPROT3( ctype_a, ctype_x, ctype_y, cha, chx, chy, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype_a* restrict alpha, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3_BASIC( AXPYV_KERNEL )
|
||||
|
||||
// copyv kernels
|
||||
|
||||
#define bli_ssCOPYV_KERNEL BLIS_SCOPYV_KERNEL
|
||||
#define bli_ddCOPYV_KERNEL BLIS_DCOPYV_KERNEL
|
||||
#define bli_ccCOPYV_KERNEL BLIS_CCOPYV_KERNEL
|
||||
#define bli_zzCOPYV_KERNEL BLIS_ZCOPYV_KERNEL
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
|
||||
\
|
||||
void PASTEMAC2(chx,chy,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC( COPYV_KERNEL )
|
||||
|
||||
// dotv kernels
|
||||
|
||||
#define bli_sssDOTV_KERNEL BLIS_SDOTV_KERNEL
|
||||
#define bli_dddDOTV_KERNEL BLIS_DDOTV_KERNEL
|
||||
#define bli_cccDOTV_KERNEL BLIS_CDOTV_KERNEL
|
||||
#define bli_zzzDOTV_KERNEL BLIS_ZDOTV_KERNEL
|
||||
|
||||
#undef GENTPROT3
|
||||
#define GENTPROT3( ctype_x, ctype_y, ctype_r, chx, chy, chr, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chr,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_r* restrict rho \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3_BASIC( DOTV_KERNEL )
|
||||
|
||||
// dotxv kernels
|
||||
|
||||
#define bli_sssDOTXV_KERNEL BLIS_SDOTXV_KERNEL
|
||||
#define bli_dddDOTXV_KERNEL BLIS_DDOTXV_KERNEL
|
||||
#define bli_cccDOTXV_KERNEL BLIS_CDOTXV_KERNEL
|
||||
#define bli_zzzDOTXV_KERNEL BLIS_ZDOTXV_KERNEL
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_x, ctype_y, ctype_r, ctype_xy, chx, chy, chr, chxy, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chr,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype_xy* restrict alpha, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_r* restrict beta, \
|
||||
ctype_r* restrict rho \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( DOTXV_KERNEL )
|
||||
|
||||
// invertv kernels
|
||||
|
||||
#define bli_sINVERTV_KERNEL BLIS_SINVERTV_KERNEL
|
||||
#define bli_dINVERTV_KERNEL BLIS_DINVERTV_KERNEL
|
||||
#define bli_cINVERTV_KERNEL BLIS_CINVERTV_KERNEL
|
||||
#define bli_zINVERTV_KERNEL BLIS_ZINVERTV_KERNEL
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, kername ) \
|
||||
\
|
||||
void PASTEMAC(ch,kername) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( INVERTV_KERNEL )
|
||||
|
||||
// scal2v kernels
|
||||
|
||||
#define bli_sssSCAL2V_KERNEL BLIS_SSCAL2V_KERNEL
|
||||
#define bli_dddSCAL2V_KERNEL BLIS_DSCAL2V_KERNEL
|
||||
#define bli_cccSCAL2V_KERNEL BLIS_CSCAL2V_KERNEL
|
||||
#define bli_zzzSCAL2V_KERNEL BLIS_ZSCAL2V_KERNEL
|
||||
|
||||
#undef GENTPROT3
|
||||
#define GENTPROT3( ctype_b, ctype_x, ctype_y, chb, chx, chy, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chb,chx,chy,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype_b* restrict beta, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3_BASIC( SCAL2V_KERNEL )
|
||||
|
||||
// scalv kernels
|
||||
|
||||
#define bli_ssSCALV_KERNEL BLIS_SSCALV_KERNEL
|
||||
#define bli_ddSCALV_KERNEL BLIS_DSCALV_KERNEL
|
||||
#define bli_ccSCALV_KERNEL BLIS_CSCALV_KERNEL
|
||||
#define bli_zzSCALV_KERNEL BLIS_ZSCALV_KERNEL
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_b, ctype_x, chb, chx, kername ) \
|
||||
\
|
||||
void PASTEMAC2(chb,chx,kername) \
|
||||
( \
|
||||
conj_t conjbeta, \
|
||||
dim_t n, \
|
||||
ctype_b* restrict beta, \
|
||||
ctype_x* restrict x, inc_t incx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC( SCALV_KERNEL )
|
||||
|
||||
// setv kernels
|
||||
|
||||
#define bli_ssSETV_KERNEL BLIS_SSETV_KERNEL
|
||||
#define bli_ddSETV_KERNEL BLIS_DSETV_KERNEL
|
||||
#define bli_ccSETV_KERNEL BLIS_CSETV_KERNEL
|
||||
#define bli_zzSETV_KERNEL BLIS_ZSETV_KERNEL
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_b, ctype_x, chb, chx, kername ) \
|
||||
\
|
||||
void PASTEMAC2(chb,chx,kername) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype_b* restrict beta, \
|
||||
ctype_x* restrict x, inc_t incx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC( SETV_KERNEL )
|
||||
|
||||
// subv kernels
|
||||
|
||||
#define bli_ssSUBV_KERNEL BLIS_SSUBV_KERNEL
|
||||
#define bli_ddSUBV_KERNEL BLIS_DSUBV_KERNEL
|
||||
#define bli_ccSUBV_KERNEL BLIS_CSUBV_KERNEL
|
||||
#define bli_zzSUBV_KERNEL BLIS_ZSUBV_KERNEL
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
|
||||
\
|
||||
void PASTEMAC2(chx,chy,kername) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC( SUBV_KERNEL )
|
||||
|
||||
// swapv kernels
|
||||
|
||||
#define bli_ssSWAPV_KERNEL BLIS_SSWAPV_KERNEL
|
||||
#define bli_ddSWAPV_KERNEL BLIS_DSWAPV_KERNEL
|
||||
#define bli_ccSWAPV_KERNEL BLIS_CSWAPV_KERNEL
|
||||
#define bli_zzSWAPV_KERNEL BLIS_ZSWAPV_KERNEL
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_x, ctype_y, chx, chy, kername ) \
|
||||
\
|
||||
void PASTEMAC2(chx,chy,kername) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC( SWAPV_KERNEL )
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -114,6 +114,7 @@
|
||||
#define BLIS_DEFAULT_MAXR_Z BLIS_DEFAULT_NR_Z
|
||||
#endif
|
||||
|
||||
|
||||
// Next, we define the dimensions of the pool blocks for each datatype.
|
||||
|
||||
//
|
||||
@@ -156,6 +157,47 @@
|
||||
#define BLIS_POOL_KC_Z ( ( BLIS_MAXIMUM_KC_Z * BLIS_PACKDIM_KR_Z ) \
|
||||
/ BLIS_DEFAULT_KR_Z )
|
||||
|
||||
//
|
||||
// Compute pool dimensions for single complex (4m)
|
||||
//
|
||||
#define BLIS_POOL_4M_MC_C ( ( BLIS_MAXIMUM_4M_MC_C * BLIS_PACKDIM_MAXR_S ) \
|
||||
/ BLIS_DEFAULT_MAXR_S )
|
||||
#define BLIS_POOL_4M_NC_C ( ( BLIS_MAXIMUM_4M_NC_C * BLIS_PACKDIM_MAXR_S ) \
|
||||
/ BLIS_DEFAULT_MAXR_S )
|
||||
#define BLIS_POOL_4M_KC_C ( ( BLIS_MAXIMUM_4M_KC_C * BLIS_PACKDIM_KR_S ) \
|
||||
/ BLIS_DEFAULT_KR_S )
|
||||
|
||||
//
|
||||
// Compute pool dimensions for double complex (4m)
|
||||
//
|
||||
#define BLIS_POOL_4M_MC_Z ( ( BLIS_MAXIMUM_4M_MC_Z * BLIS_PACKDIM_MAXR_D ) \
|
||||
/ BLIS_DEFAULT_MAXR_D )
|
||||
#define BLIS_POOL_4M_NC_Z ( ( BLIS_MAXIMUM_4M_NC_Z * BLIS_PACKDIM_MAXR_D ) \
|
||||
/ BLIS_DEFAULT_MAXR_D )
|
||||
#define BLIS_POOL_4M_KC_Z ( ( BLIS_MAXIMUM_4M_KC_Z * BLIS_PACKDIM_KR_D ) \
|
||||
/ BLIS_DEFAULT_KR_D )
|
||||
|
||||
//
|
||||
// Compute pool dimensions for single complex (3m)
|
||||
//
|
||||
#define BLIS_POOL_3M_MC_C ( ( BLIS_MAXIMUM_3M_MC_C * BLIS_PACKDIM_MAXR_S ) \
|
||||
/ BLIS_DEFAULT_MAXR_S )
|
||||
#define BLIS_POOL_3M_NC_C ( ( BLIS_MAXIMUM_3M_NC_C * BLIS_PACKDIM_MAXR_S ) \
|
||||
/ BLIS_DEFAULT_MAXR_S )
|
||||
#define BLIS_POOL_3M_KC_C ( ( BLIS_MAXIMUM_3M_KC_C * BLIS_PACKDIM_KR_S ) \
|
||||
/ BLIS_DEFAULT_KR_S )
|
||||
|
||||
//
|
||||
// Compute pool dimensions for double complex (3m)
|
||||
//
|
||||
#define BLIS_POOL_3M_MC_Z ( ( BLIS_MAXIMUM_3M_MC_Z * BLIS_PACKDIM_MAXR_D ) \
|
||||
/ BLIS_DEFAULT_MAXR_D )
|
||||
#define BLIS_POOL_3M_NC_Z ( ( BLIS_MAXIMUM_3M_NC_Z * BLIS_PACKDIM_MAXR_D ) \
|
||||
/ BLIS_DEFAULT_MAXR_D )
|
||||
#define BLIS_POOL_3M_KC_Z ( ( BLIS_MAXIMUM_3M_KC_Z * BLIS_PACKDIM_KR_D ) \
|
||||
/ BLIS_DEFAULT_KR_D )
|
||||
|
||||
|
||||
// Now, we compute the size of each block/panel of A, B, and C for each
|
||||
// datatype.
|
||||
|
||||
@@ -168,19 +210,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for single real.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \
|
||||
( BLIS_POOL_KC_S + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_S \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_S * \
|
||||
BLIS_SIZEOF_S \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_S ( ( BLIS_POOL_KC_S + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_S \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_S ( BLIS_POOL_KC_S * \
|
||||
BLIS_POOL_NC_S * \
|
||||
BLIS_SIZEOF_S \
|
||||
)
|
||||
@@ -192,19 +227,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for double real.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \
|
||||
( BLIS_POOL_KC_D + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_D \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_D * \
|
||||
BLIS_SIZEOF_D \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_D ( ( BLIS_POOL_KC_D + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_D \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_D ( BLIS_POOL_KC_D * \
|
||||
BLIS_POOL_NC_D * \
|
||||
BLIS_SIZEOF_D \
|
||||
)
|
||||
@@ -216,19 +244,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for single complex.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \
|
||||
( BLIS_POOL_KC_C + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_C \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_C ( ( BLIS_POOL_KC_C + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_C \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_C ( BLIS_POOL_KC_C * \
|
||||
BLIS_POOL_NC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
@@ -238,21 +259,14 @@
|
||||
)
|
||||
|
||||
//
|
||||
// Compute memory pool block sizes for single complex.
|
||||
// Compute memory pool block sizes for double complex.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \
|
||||
( BLIS_POOL_KC_Z + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_Z \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_Z ( ( BLIS_POOL_KC_Z + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_Z \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_Z ( BLIS_POOL_KC_Z * \
|
||||
BLIS_POOL_NC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
@@ -261,6 +275,90 @@
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
|
||||
//
|
||||
// Compute memory pool block sizes for single complex (4m).
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
|
||||
BLIS_POOL_4M_KC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_KC_C * \
|
||||
BLIS_POOL_4M_NC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
#define BLIS_MN_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
|
||||
BLIS_POOL_4M_NC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
|
||||
//
|
||||
// Compute memory pool block sizes for double complex (4m).
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
|
||||
BLIS_POOL_4M_KC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_KC_Z * \
|
||||
BLIS_POOL_4M_NC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
#define BLIS_MN_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
|
||||
BLIS_POOL_4M_NC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
|
||||
//
|
||||
// Compute memory pool block sizes for single complex (3m).
|
||||
//
|
||||
|
||||
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
|
||||
BLIS_POOL_3M_KC_C * \
|
||||
( BLIS_SIZEOF_C * \
|
||||
3 \
|
||||
) / 2 \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_KC_C * \
|
||||
BLIS_POOL_3M_NC_C * \
|
||||
( BLIS_SIZEOF_C * \
|
||||
3 \
|
||||
) / 2 \
|
||||
)
|
||||
#define BLIS_MN_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
|
||||
BLIS_POOL_3M_NC_C * \
|
||||
( BLIS_SIZEOF_C * \
|
||||
3 \
|
||||
) / 2 \
|
||||
)
|
||||
|
||||
//
|
||||
// Compute memory pool block sizes for double complex (3m).
|
||||
//
|
||||
|
||||
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
|
||||
BLIS_POOL_3M_KC_Z * \
|
||||
( BLIS_SIZEOF_Z * \
|
||||
3 \
|
||||
) / 2 \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_KC_Z * \
|
||||
BLIS_POOL_3M_NC_Z * \
|
||||
( BLIS_SIZEOF_Z * \
|
||||
3 \
|
||||
) / 2 \
|
||||
)
|
||||
#define BLIS_MN_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
|
||||
BLIS_POOL_3M_NC_Z * \
|
||||
( BLIS_SIZEOF_Z * \
|
||||
3 \
|
||||
) / 2 \
|
||||
)
|
||||
|
||||
|
||||
// -- Maximum block size search ------------------------------------------------
|
||||
|
||||
@@ -283,6 +381,22 @@
|
||||
#undef BLIS_MK_BLOCK_SIZE
|
||||
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_Z
|
||||
#endif
|
||||
#if BLIS_MK_BLOCK_SIZE_4M_C > BLIS_MK_BLOCK_SIZE
|
||||
#undef BLIS_MK_BLOCK_SIZE
|
||||
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_4M_C
|
||||
#endif
|
||||
#if BLIS_MK_BLOCK_SIZE_4M_Z > BLIS_MK_BLOCK_SIZE
|
||||
#undef BLIS_MK_BLOCK_SIZE
|
||||
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_4M_Z
|
||||
#endif
|
||||
#if BLIS_MK_BLOCK_SIZE_3M_C > BLIS_MK_BLOCK_SIZE
|
||||
#undef BLIS_MK_BLOCK_SIZE
|
||||
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_3M_C
|
||||
#endif
|
||||
#if BLIS_MK_BLOCK_SIZE_3M_Z > BLIS_MK_BLOCK_SIZE
|
||||
#undef BLIS_MK_BLOCK_SIZE
|
||||
#define BLIS_MK_BLOCK_SIZE BLIS_MK_BLOCK_SIZE_3M_Z
|
||||
#endif
|
||||
|
||||
//
|
||||
// Find the largest block size for panels of B.
|
||||
@@ -300,6 +414,22 @@
|
||||
#undef BLIS_KN_BLOCK_SIZE
|
||||
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_Z
|
||||
#endif
|
||||
#if BLIS_KN_BLOCK_SIZE_4M_C > BLIS_KN_BLOCK_SIZE
|
||||
#undef BLIS_KN_BLOCK_SIZE
|
||||
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_4M_C
|
||||
#endif
|
||||
#if BLIS_KN_BLOCK_SIZE_4M_Z > BLIS_KN_BLOCK_SIZE
|
||||
#undef BLIS_KN_BLOCK_SIZE
|
||||
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_4M_Z
|
||||
#endif
|
||||
#if BLIS_KN_BLOCK_SIZE_3M_C > BLIS_KN_BLOCK_SIZE
|
||||
#undef BLIS_KN_BLOCK_SIZE
|
||||
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_3M_C
|
||||
#endif
|
||||
#if BLIS_KN_BLOCK_SIZE_3M_Z > BLIS_KN_BLOCK_SIZE
|
||||
#undef BLIS_KN_BLOCK_SIZE
|
||||
#define BLIS_KN_BLOCK_SIZE BLIS_KN_BLOCK_SIZE_3M_Z
|
||||
#endif
|
||||
|
||||
//
|
||||
// Find the largest block size for panels of C.
|
||||
@@ -317,6 +447,22 @@
|
||||
#undef BLIS_MN_BLOCK_SIZE
|
||||
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_Z
|
||||
#endif
|
||||
#if BLIS_MN_BLOCK_SIZE_4M_C > BLIS_MN_BLOCK_SIZE
|
||||
#undef BLIS_MN_BLOCK_SIZE
|
||||
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_4M_C
|
||||
#endif
|
||||
#if BLIS_MN_BLOCK_SIZE_4M_Z > BLIS_MN_BLOCK_SIZE
|
||||
#undef BLIS_MN_BLOCK_SIZE
|
||||
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_4M_Z
|
||||
#endif
|
||||
#if BLIS_MN_BLOCK_SIZE_3M_C > BLIS_MN_BLOCK_SIZE
|
||||
#undef BLIS_MN_BLOCK_SIZE
|
||||
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_3M_C
|
||||
#endif
|
||||
#if BLIS_MN_BLOCK_SIZE_3M_Z > BLIS_MN_BLOCK_SIZE
|
||||
#undef BLIS_MN_BLOCK_SIZE
|
||||
#define BLIS_MN_BLOCK_SIZE BLIS_MN_BLOCK_SIZE_3M_Z
|
||||
#endif
|
||||
|
||||
|
||||
// -- Compute pool sizes -------------------------------------------------------
|
||||
|
||||
@@ -232,9 +232,9 @@
|
||||
(obj).info = ( (obj).info & ~BLIS_TRANS_BIT ) | (trans); \
|
||||
}
|
||||
|
||||
#define bli_obj_set_conj( conj, obj ) \
|
||||
#define bli_obj_set_conj( conjval, obj ) \
|
||||
{ \
|
||||
(obj).info = ( (obj).info & ~BLIS_CONJ_BIT ) | (conj); \
|
||||
(obj).info = ( (obj).info & ~BLIS_CONJ_BIT ) | (conjval); \
|
||||
}
|
||||
|
||||
#define bli_obj_set_uplo( uplo, obj ) \
|
||||
@@ -329,9 +329,9 @@
|
||||
(obj).info = ( (obj).info ^ (trans) ); \
|
||||
}
|
||||
|
||||
#define bli_obj_apply_conj( conj, obj )\
|
||||
#define bli_obj_apply_conj( conjval, obj )\
|
||||
{ \
|
||||
(obj).info = ( (obj).info ^ (conj) ); \
|
||||
(obj).info = ( (obj).info ^ (conjval) ); \
|
||||
}
|
||||
|
||||
|
||||
@@ -842,10 +842,10 @@ bli_obj_width_stored( obj )
|
||||
|
||||
// Create an alias with a conj value applied.
|
||||
|
||||
#define bli_obj_alias_with_conj( conj, a, b ) \
|
||||
#define bli_obj_alias_with_conj( conja, a, b ) \
|
||||
{ \
|
||||
bli_obj_alias_to( a, b ); \
|
||||
bli_obj_apply_conj( conj, b ); \
|
||||
bli_obj_apply_conj( conja, b ); \
|
||||
}
|
||||
|
||||
|
||||
@@ -878,6 +878,19 @@ bli_obj_width_stored( obj )
|
||||
bli_obj_pack_status( obj ) == BLIS_PACKED_COL_PANELS )
|
||||
|
||||
|
||||
// Check if an object is packed for 4m/3m
|
||||
|
||||
#define bli_obj_is_panel_packed_4m( obj ) \
|
||||
\
|
||||
( bli_obj_pack_status( obj ) == BLIS_PACKED_ROW_PANELS_4M || \
|
||||
bli_obj_pack_status( obj ) == BLIS_PACKED_COL_PANELS_4M )
|
||||
|
||||
#define bli_obj_is_panel_packed_3m( obj ) \
|
||||
\
|
||||
( bli_obj_pack_status( obj ) == BLIS_PACKED_ROW_PANELS_3M || \
|
||||
bli_obj_pack_status( obj ) == BLIS_PACKED_COL_PANELS_3M )
|
||||
|
||||
|
||||
// Release object's pack (and cast) memory entries back to memory manager
|
||||
|
||||
#define bli_obj_release_pack( obj_p ) \
|
||||
|
||||
@@ -199,25 +199,25 @@
|
||||
|
||||
// conj
|
||||
|
||||
#define bli_is_noconj( conj ) \
|
||||
#define bli_is_noconj( conjval ) \
|
||||
\
|
||||
( conj == BLIS_NO_CONJUGATE )
|
||||
( conjval == BLIS_NO_CONJUGATE )
|
||||
|
||||
#define bli_is_conj( conj ) \
|
||||
#define bli_is_conj( conjval ) \
|
||||
\
|
||||
( conj == BLIS_CONJUGATE )
|
||||
( conjval == BLIS_CONJUGATE )
|
||||
|
||||
#define bli_conj_toggled( conj ) \
|
||||
#define bli_conj_toggled( conjval ) \
|
||||
\
|
||||
( conj ^ BLIS_CONJ_BIT )
|
||||
( conjval ^ BLIS_CONJ_BIT )
|
||||
|
||||
#define bli_apply_conj( conjapp, conj )\
|
||||
#define bli_apply_conj( conjapp, conjval )\
|
||||
\
|
||||
( conj ^ (conjapp) )
|
||||
( conjval ^ (conjapp) )
|
||||
|
||||
#define bli_toggle_conj( conj ) \
|
||||
#define bli_toggle_conj( conjval ) \
|
||||
{ \
|
||||
conj = bli_conj_toggled( conj ); \
|
||||
conjval = bli_conj_toggled( conjval ); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -83,6 +83,9 @@
|
||||
#include "bli_addjris.h"
|
||||
#include "bli_addjs.h"
|
||||
|
||||
#include "bli_add3ris.h"
|
||||
#include "bli_add3s.h"
|
||||
|
||||
#include "bli_axpyris.h"
|
||||
#include "bli_axpys.h"
|
||||
#include "bli_axpyjris.h"
|
||||
@@ -103,6 +106,9 @@
|
||||
#include "bli_copycjris.h"
|
||||
#include "bli_copycjs.h"
|
||||
|
||||
#include "bli_copyri3s.h"
|
||||
#include "bli_copyjri3s.h"
|
||||
|
||||
#include "bli_dots.h"
|
||||
#include "bli_dotjs.h"
|
||||
|
||||
@@ -129,6 +135,16 @@
|
||||
#include "bli_scal2jris.h"
|
||||
#include "bli_scal2js.h"
|
||||
|
||||
#include "bli_scal2ri3s.h"
|
||||
#include "bli_scal2jri3s.h"
|
||||
|
||||
#include "bli_set0ris.h"
|
||||
#include "bli_set0s.h"
|
||||
|
||||
#include "bli_set1s.h"
|
||||
|
||||
#include "bli_seti0s.h"
|
||||
|
||||
#include "bli_sqrt2ris.h"
|
||||
#include "bli_sqrt2s.h"
|
||||
|
||||
@@ -164,6 +180,8 @@
|
||||
#include "bli_xpbys_mxn.h"
|
||||
#include "bli_xpbys_mxn_uplo.h"
|
||||
|
||||
#include "bli_scalris_mxn_uplo.h"
|
||||
|
||||
|
||||
// -- Miscellaneous macros --
|
||||
|
||||
|
||||
@@ -244,6 +244,10 @@ typedef struct
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS 0x30000
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS 0x40000
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS 0x50000
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_4M 0x60000
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_4M 0x70000
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_3M 0x80000
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_3M 0x90000
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_UPPER 0x100000
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
|
||||
@@ -346,13 +350,17 @@ typedef enum
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
|
||||
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
|
||||
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
|
||||
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
|
||||
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
|
||||
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
|
||||
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
|
||||
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
|
||||
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
|
||||
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
|
||||
BLIS_PACKED_ROW_PANELS_4M = BLIS_BITVAL_PACKED_ROW_PANELS_4M,
|
||||
BLIS_PACKED_COL_PANELS_4M = BLIS_BITVAL_PACKED_COL_PANELS_4M,
|
||||
BLIS_PACKED_ROW_PANELS_3M = BLIS_BITVAL_PACKED_ROW_PANELS_3M,
|
||||
BLIS_PACKED_COL_PANELS_3M = BLIS_BITVAL_PACKED_COL_PANELS_3M,
|
||||
} pack_t;
|
||||
|
||||
|
||||
@@ -460,10 +468,10 @@ typedef struct func_s
|
||||
- 3 == packed by columns
|
||||
- 4 == packed by row panels
|
||||
- 5 == packed by column panels
|
||||
- 6 == unused
|
||||
- 7 == unused
|
||||
- 8 == unused
|
||||
- 9 == unused
|
||||
- 6 == packed by row panels (4m)
|
||||
- 7 == packed by column panels (4m)
|
||||
- 8 == packed by row panels (3m)
|
||||
- 9 == packed by column panels (3m)
|
||||
20 Packed panel order if upper-stored
|
||||
- 0 == forward order if upper
|
||||
- 1 == reverse order if upper
|
||||
|
||||
@@ -78,8 +78,12 @@ extern "C" {
|
||||
|
||||
#include "bli_kernel.h"
|
||||
#include "bli_kernel_type_defs.h"
|
||||
#include "bli_kernel_pre_macro_defs.h"
|
||||
#include "bli_kernel_macro_defs.h"
|
||||
#include "bli_kernel_4m_macro_defs.h"
|
||||
#include "bli_kernel_3m_macro_defs.h"
|
||||
#include "bli_kernel_post_macro_defs.h"
|
||||
#include "bli_kernel_prototypes.h"
|
||||
|
||||
|
||||
// -- BLIS memory pool definitions --
|
||||
|
||||
@@ -41,6 +41,8 @@
|
||||
// - The first char encodes the type of x.
|
||||
// - The second char encodes the type of a.
|
||||
|
||||
#ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
#define bli_ssabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), bli_simag(a) )
|
||||
#define bli_dsabval2s( x, a ) bli_sabval2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), bli_simag(a) )
|
||||
#define bli_csabval2s( x, a ) bli_sabval2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), bli_simag(a) )
|
||||
@@ -51,8 +53,6 @@
|
||||
#define bli_cdabval2s( x, a ) bli_dabval2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), bli_dimag(a) )
|
||||
#define bli_zdabval2s( x, a ) bli_dabval2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), bli_dimag(a) )
|
||||
|
||||
#ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
#define bli_scabval2s( x, a ) bli_cabval2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) )
|
||||
#define bli_dcabval2s( x, a ) bli_cabval2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) )
|
||||
#define bli_ccabval2s( x, a ) bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) )
|
||||
|
||||
192
frame/include/level0/bli_add3s.h
Normal file
192
frame/include/level0/bli_add3s.h
Normal file
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_ADD3S_H
|
||||
#define BLIS_ADD3S_H
|
||||
|
||||
// add3s
|
||||
|
||||
// Notes:
|
||||
// - The first char encodes the type of a.
|
||||
// - The second char encodes the type of b.
|
||||
// - The third char encodes the type of c.
|
||||
|
||||
|
||||
// -- (axy) = (??s) ------------------------------------------------------------
|
||||
|
||||
#define bli_sssadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_dssadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_cssadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_zssadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) )
|
||||
|
||||
#define bli_sdsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_ddsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_cdsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_zdsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) )
|
||||
|
||||
#define bli_scsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_dcsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_ccsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_zcsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) )
|
||||
|
||||
#define bli_szsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_dzsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_czsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
|
||||
#define bli_zzsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) )
|
||||
|
||||
// -- (axy) = (??d) ------------------------------------------------------------
|
||||
|
||||
#define bli_ssdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_dsdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_csdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_zsdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) )
|
||||
|
||||
#define bli_sddadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_dddadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_cddadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_zddadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
|
||||
#define bli_scdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_dcdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_ccdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_zcdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
|
||||
#define bli_szdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_dzdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_czdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
#define bli_zzdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) )
|
||||
|
||||
#ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
// -- (axy) = (??c) ------------------------------------------------------------
|
||||
|
||||
#define bli_sscadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_dscadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_cscadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_zscadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) )
|
||||
|
||||
#define bli_sdcadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_ddcadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_cdcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_zdcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) )
|
||||
|
||||
#define bli_sccadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_dccadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_cccadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_zccadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) )
|
||||
|
||||
#define bli_szcadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_dzcadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_czcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
|
||||
#define bli_zzcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) )
|
||||
|
||||
// -- (axy) = (??z) ------------------------------------------------------------
|
||||
|
||||
#define bli_sszadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_dszadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_cszadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_zszadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) )
|
||||
|
||||
#define bli_sdzadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_ddzadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_cdzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_zdzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
|
||||
#define bli_sczadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_dczadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_cczadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_zczadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
|
||||
#define bli_szzadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_dzzadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_czzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
#define bli_zzzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) )
|
||||
|
||||
#else // ifdef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
// -- (axy) = (??c) ------------------------------------------------------------
|
||||
|
||||
#define bli_sscadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_dscadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_cscadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zscadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#define bli_sdcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_ddcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_cdcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zdcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#define bli_sccadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_dccadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_cccadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zccadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#define bli_szcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_dzcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_czcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zzcadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
// -- (axy) = (??z) ------------------------------------------------------------
|
||||
|
||||
#define bli_sszadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_dszadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_cszadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zszadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#define bli_sdzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_ddzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_cdzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zdzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#define bli_sczadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_dczadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_cczadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zczadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#define bli_szzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_dzzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_czzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
#define bli_zzzadd3s( a, b, c ) { (c) = (a) + (b); }
|
||||
|
||||
#endif // BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
#define bli_sadd3s( a, b, c ) bli_sssadd3s( a, b, c )
|
||||
#define bli_dadd3s( a, b, c ) bli_dddadd3s( a, b, c )
|
||||
#define bli_cadd3s( a, b, c ) bli_cccadd3s( a, b, c )
|
||||
#define bli_zadd3s( a, b, c ) bli_zzzadd3s( a, b, c )
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -148,28 +148,5 @@
|
||||
( ( gint_t* ) bli_obj_buffer_for_const( BLIS_INT, BLIS_MINUS_TWO ) )
|
||||
|
||||
|
||||
// set to constant
|
||||
|
||||
// set1s
|
||||
|
||||
#define bli_sset1s( a ) bli_ssets( 1.0F, 0.0F, (a) )
|
||||
#define bli_dset1s( a ) bli_dsets( 1.0 , 0.0 , (a) )
|
||||
#define bli_cset1s( a ) bli_csets( 1.0F, 0.0F, (a) )
|
||||
#define bli_zset1s( a ) bli_zsets( 1.0 , 0.0 , (a) )
|
||||
|
||||
// set0s
|
||||
|
||||
#define bli_sset0s( a ) bli_ssets( 0.0F, 0.0F, (a) )
|
||||
#define bli_dset0s( a ) bli_dsets( 0.0 , 0.0 , (a) )
|
||||
#define bli_cset0s( a ) bli_csets( 0.0F, 0.0F, (a) )
|
||||
#define bli_zset0s( a ) bli_zsets( 0.0 , 0.0 , (a) )
|
||||
|
||||
// seti0s
|
||||
|
||||
#define bli_sseti0s( a ) bli_ssetis( 0.0F, (a) )
|
||||
#define bli_dseti0s( a ) bli_dsetis( 0.0 , (a) )
|
||||
#define bli_cseti0s( a ) bli_csetis( 0.0F, (a) )
|
||||
#define bli_zseti0s( a ) bli_zsetis( 0.0 , (a) )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -41,51 +41,51 @@
|
||||
// - The first char encodes the type of x.
|
||||
// - The second char encodes the type of y.
|
||||
|
||||
#define bli_sscopycjs( conj, x, y ) bli_scopycjris( conj, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_dscopycjs( conj, x, y ) bli_scopycjris( conj, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_cscopycjs( conj, x, y ) bli_scopycjris( conj, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_zscopycjs( conj, x, y ) bli_scopycjris( conj, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_sscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_dscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_cscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_zscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
|
||||
|
||||
#define bli_sdcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_ddcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_cdcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_zdcopycjs( conj, x, y ) bli_dcopycjris( conj, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_sdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_ddcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_cdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_zdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
|
||||
#ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
#define bli_sccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_dccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_cccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_zccopycjs( conj, x, y ) bli_ccopycjris( conj, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_sccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_dccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_cccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_zccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
|
||||
|
||||
#define bli_szcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_dzcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_czcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_zzcopycjs( conj, x, y ) bli_zcopycjris( conj, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_szcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_dzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_czcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_zzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
|
||||
#else // ifdef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
#define bli_sccopycjs( conj, x, y ) { (y) = (x); }
|
||||
#define bli_dccopycjs( conj, x, y ) { (y) = (x); }
|
||||
#define bli_cccopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
|
||||
#define bli_zccopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conj (x) : (x) ); }
|
||||
#define bli_sccopycjs( conjx, x, y ) { (y) = (x); }
|
||||
#define bli_dccopycjs( conjx, x, y ) { (y) = (x); }
|
||||
#define bli_cccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
|
||||
#define bli_zccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
|
||||
|
||||
#define bli_szcopycjs( conj, x, y ) { (y) = (x); }
|
||||
#define bli_dzcopycjs( conj, x, y ) { (y) = (x); }
|
||||
#define bli_czcopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
|
||||
#define bli_zzcopycjs( conj, x, y ) { (y) = ( bli_is_conj( conj ) ? conj (x) : (x) ); }
|
||||
#define bli_szcopycjs( conjx, x, y ) { (y) = (x); }
|
||||
#define bli_dzcopycjs( conjx, x, y ) { (y) = (x); }
|
||||
#define bli_czcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
|
||||
#define bli_zzcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
|
||||
|
||||
#endif // BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
#define bli_iicopycjs( conj, x, y ) { (y) = ( gint_t ) (x); }
|
||||
#define bli_iicopycjs( conjx, x, y ) { (y) = ( gint_t ) (x); }
|
||||
|
||||
|
||||
#define bli_scopycjs( conj, x, y ) bli_sscopycjs( conj, x, y )
|
||||
#define bli_dcopycjs( conj, x, y ) bli_ddcopycjs( conj, x, y )
|
||||
#define bli_ccopycjs( conj, x, y ) bli_cccopycjs( conj, x, y )
|
||||
#define bli_zcopycjs( conj, x, y ) bli_zzcopycjs( conj, x, y )
|
||||
#define bli_icopycjs( conj, x, y ) bli_iicopycjs( conj, x, y )
|
||||
#define bli_scopycjs( conjx, x, y ) bli_sscopycjs( conjx, x, y )
|
||||
#define bli_dcopycjs( conjx, x, y ) bli_ddcopycjs( conjx, x, y )
|
||||
#define bli_ccopycjs( conjx, x, y ) bli_cccopycjs( conjx, x, y )
|
||||
#define bli_zcopycjs( conjx, x, y ) bli_zzcopycjs( conjx, x, y )
|
||||
#define bli_icopycjs( conjx, x, y ) bli_iicopycjs( conjx, x, y )
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -41,47 +41,47 @@
|
||||
// - The first char encodes the type of x.
|
||||
// - The second char encodes the type of y.
|
||||
|
||||
#define bli_ssscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_dsscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_csscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_zsscalcjs( conj, x, y ) bli_sscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_ssscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_dsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_csscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) )
|
||||
#define bli_zsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) )
|
||||
|
||||
#define bli_sdscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_ddscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_cdscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_zdscalcjs( conj, x, y ) bli_dscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_sdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_ddscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_cdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
#define bli_zdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) )
|
||||
|
||||
#ifndef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
#define bli_scscalcjs( conj, x, y ) bli_scscalcjris( conj, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_dcscalcjs( conj, x, y ) bli_scscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_ccscalcjs( conj, x, y ) bli_cscalcjris( conj, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_zcscalcjs( conj, x, y ) bli_cscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_scscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_dcscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_ccscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) )
|
||||
#define bli_zcscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) )
|
||||
|
||||
#define bli_szscalcjs( conj, x, y ) bli_dzscalcjris( conj, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_dzscalcjs( conj, x, y ) bli_dzscalcjris( conj, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_czscalcjs( conj, x, y ) bli_zscalcjris( conj, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_zzscalcjs( conj, x, y ) bli_zscalcjris( conj, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_szscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_dzscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_czscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
#define bli_zzscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) )
|
||||
|
||||
#else // ifdef BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
#define bli_scscalcjs( conj, x, y ) { (y) *= (x); }
|
||||
#define bli_dcscalcjs( conj, x, y ) { (y) *= (x); }
|
||||
#define bli_ccscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
|
||||
#define bli_zcscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conj (x) : (x) ); }
|
||||
#define bli_scscalcjs( conjx, x, y ) { (y) *= (x); }
|
||||
#define bli_dcscalcjs( conjx, x, y ) { (y) *= (x); }
|
||||
#define bli_ccscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
|
||||
#define bli_zcscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
|
||||
|
||||
#define bli_szscalcjs( conj, x, y ) { (y) *= (x); }
|
||||
#define bli_dzscalcjs( conj, x, y ) { (y) *= (x); }
|
||||
#define bli_czscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conjf(x) : (x) ); }
|
||||
#define bli_zzscalcjs( conj, x, y ) { (y) *= ( bli_is_conj( conj ) ? conj (x) : (x) ); }
|
||||
#define bli_szscalcjs( conjx, x, y ) { (y) *= (x); }
|
||||
#define bli_dzscalcjs( conjx, x, y ) { (y) *= (x); }
|
||||
#define bli_czscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); }
|
||||
#define bli_zzscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); }
|
||||
|
||||
#endif // BLIS_ENABLE_C99_COMPLEX
|
||||
|
||||
|
||||
#define bli_sscalcjs( conj, x, y ) bli_ssscalcjs( conj, x, y )
|
||||
#define bli_dscalcjs( conj, x, y ) bli_ddscalcjs( conj, x, y )
|
||||
#define bli_cscalcjs( conj, x, y ) bli_ccscalcjs( conj, x, y )
|
||||
#define bli_zscalcjs( conj, x, y ) bli_zzscalcjs( conj, x, y )
|
||||
#define bli_sscalcjs( conjx, x, y ) bli_ssscalcjs( conjx, x, y )
|
||||
#define bli_dscalcjs( conjx, x, y ) bli_ddscalcjs( conjx, x, y )
|
||||
#define bli_cscalcjs( conjx, x, y ) bli_ccscalcjs( conjx, x, y )
|
||||
#define bli_zscalcjs( conjx, x, y ) bli_zzscalcjs( conjx, x, y )
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
44
frame/include/level0/bli_set0s.h
Normal file
44
frame/include/level0/bli_set0s.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SET0S_H
|
||||
#define BLIS_SET0S_H
|
||||
|
||||
#define bli_sset0s( a ) bli_ssets( 0.0F, 0.0F, (a) )
|
||||
#define bli_dset0s( a ) bli_dsets( 0.0 , 0.0 , (a) )
|
||||
#define bli_cset0s( a ) bli_csets( 0.0F, 0.0F, (a) )
|
||||
#define bli_zset0s( a ) bli_zsets( 0.0 , 0.0 , (a) )
|
||||
|
||||
#endif
|
||||
|
||||
44
frame/include/level0/bli_set1s.h
Normal file
44
frame/include/level0/bli_set1s.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SET1S_H
|
||||
#define BLIS_SET1S_H
|
||||
|
||||
#define bli_sset1s( a ) bli_ssets( 1.0F, 0.0F, (a) )
|
||||
#define bli_dset1s( a ) bli_dsets( 1.0 , 0.0 , (a) )
|
||||
#define bli_cset1s( a ) bli_csets( 1.0F, 0.0F, (a) )
|
||||
#define bli_zset1s( a ) bli_zsets( 1.0 , 0.0 , (a) )
|
||||
|
||||
#endif
|
||||
|
||||
44
frame/include/level0/bli_seti0s.h
Normal file
44
frame/include/level0/bli_seti0s.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SETI0S_H
|
||||
#define BLIS_SETI0S_H
|
||||
|
||||
#define bli_sseti0s( a ) bli_ssetis( 0.0F, (a) )
|
||||
#define bli_dseti0s( a ) bli_dsetis( 0.0 , (a) )
|
||||
#define bli_cseti0s( a ) bli_csetis( 0.0F, (a) )
|
||||
#define bli_zseti0s( a ) bli_zsetis( 0.0 , (a) )
|
||||
|
||||
#endif
|
||||
|
||||
81
frame/include/level0/old/bli_set0ris_mxn.h
Normal file
81
frame/include/level0/old/bli_set0ris_mxn.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SET0RIS_MXN_H
|
||||
#define BLIS_SET0RIS_MXN_H
|
||||
|
||||
// set0ris_mxn
|
||||
|
||||
#define bli_sset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_sset0ris( *(ar + i*rs_a + j*cs_a), \
|
||||
*(ai + i*rs_a + j*cs_a) ); \
|
||||
}
|
||||
|
||||
#define bli_dset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_dset0ris( *(ar + i*rs_a + j*cs_a), \
|
||||
*(ai + i*rs_a + j*cs_a) ); \
|
||||
}
|
||||
|
||||
#define bli_cset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_cset0ris( *(ar + i*rs_a + j*cs_a), \
|
||||
*(ai + i*rs_a + j*cs_a) ); \
|
||||
}
|
||||
|
||||
#define bli_zset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_zset0ris( *(ar + i*rs_a + j*cs_a), \
|
||||
*(ai + i*rs_a + j*cs_a) ); \
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
63
frame/include/level0/ri/bli_add3ris.h
Normal file
63
frame/include/level0/ri/bli_add3ris.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_ADD3RIS_H
|
||||
#define BLIS_ADD3RIS_H
|
||||
|
||||
// add3ris
|
||||
|
||||
#define bli_sadd3ris( ar, ai, br, bi, cr, ci ) \
|
||||
{ \
|
||||
(cr) = (ar) + (br); \
|
||||
}
|
||||
|
||||
#define bli_dadd3ris( ar, ai, br, bi, cr, ci ) \
|
||||
{ \
|
||||
(cr) = (ar) + (br); \
|
||||
}
|
||||
|
||||
#define bli_cadd3ris( ar, ai, br, bi, cr, ci ) \
|
||||
{ \
|
||||
(cr) = (ar) + (br); \
|
||||
(ci) = (ai) + (bi); \
|
||||
}
|
||||
|
||||
#define bli_zadd3ris( ar, ai, br, bi, cr, ci ) \
|
||||
{ \
|
||||
(cr) = (ar) + (br); \
|
||||
(ci) = (ai) + (bi); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
110
frame/include/level0/ri/bli_scalris_mxn_uplo.h
Normal file
110
frame/include/level0/ri/bli_scalris_mxn_uplo.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SCALRIS_MXN_UPLO_H
|
||||
#define BLIS_SCALRIS_MXN_UPLO_H
|
||||
|
||||
// scalris_mxn_u
|
||||
|
||||
#define bli_cscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_cscalris( *(ar), \
|
||||
*(ai), \
|
||||
*((xr) + i*rs_x + j*cs_x), \
|
||||
*((xi) + i*rs_x + j*cs_x) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define bli_zscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_zscalris( *(ar), \
|
||||
*(ai), \
|
||||
*((xr) + i*rs_x + j*cs_x), \
|
||||
*((xi) + i*rs_x + j*cs_x) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// scalris_mxn_l
|
||||
|
||||
#define bli_cscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_cscalris( *(ar), \
|
||||
*(ai), \
|
||||
*((xr) + i*rs_x + j*cs_x), \
|
||||
*((xi) + i*rs_x + j*cs_x) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define bli_zscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_zscalris( *(ar), \
|
||||
*(ai), \
|
||||
*((xr) + i*rs_x + j*cs_x), \
|
||||
*((xi) + i*rs_x + j*cs_x) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
46
frame/include/level0/ri/bli_set0ris.h
Normal file
46
frame/include/level0/ri/bli_set0ris.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SET0RIS_H
|
||||
#define BLIS_SET0RIS_H
|
||||
|
||||
// set0ris
|
||||
|
||||
#define bli_sset0ris( xr, xi ) bli_scopyris( 0.0F, 0.0F, xr, xi )
|
||||
#define bli_dset0ris( xr, xi ) bli_dcopyris( 0.0 , 0.0 , xr, xi )
|
||||
#define bli_cset0ris( xr, xi ) bli_ccopyris( 0.0F, 0.0F, xr, xi )
|
||||
#define bli_zset0ris( xr, xi ) bli_zcopyris( 0.0 , 0.0 , xr, xi )
|
||||
|
||||
#endif
|
||||
|
||||
46
frame/include/level0/ri3/bli_copyjri3s.h
Normal file
46
frame/include/level0/ri3/bli_copyjri3s.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_COPYJRI3S_H
|
||||
#define BLIS_COPYJRI3S_H
|
||||
|
||||
// copyjri3s
|
||||
|
||||
#define bli_scopyjri3s( ar, ai, br, bi, bri ) bli_scopyri3s( (ar), -(ai), (br), (bi), (bri) )
|
||||
#define bli_dcopyjri3s( ar, ai, br, bi, bri ) bli_dcopyri3s( (ar), -(ai), (br), (bi), (bri) )
|
||||
#define bli_ccopyjri3s( ar, ai, br, bi, bri ) bli_ccopyri3s( (ar), -(ai), (br), (bi), (bri) )
|
||||
#define bli_zcopyjri3s( ar, ai, br, bi, bri ) bli_zcopyri3s( (ar), -(ai), (br), (bi), (bri) )
|
||||
|
||||
#endif
|
||||
|
||||
65
frame/include/level0/ri3/bli_copyri3s.h
Normal file
65
frame/include/level0/ri3/bli_copyri3s.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_COPYRI3S_H
|
||||
#define BLIS_COPYRI3S_H
|
||||
|
||||
// copyri3s
|
||||
|
||||
#define bli_scopyri3s( ar, ai, br, bi, bri ) \
|
||||
{ \
|
||||
(br) = (ar); \
|
||||
}
|
||||
|
||||
#define bli_dcopyri3s( ar, ai, br, bi, bri ) \
|
||||
{ \
|
||||
(br) = (ar); \
|
||||
}
|
||||
|
||||
#define bli_ccopyri3s( ar, ai, br, bi, bri ) \
|
||||
{ \
|
||||
(br) = (ar); \
|
||||
(bi) = (ai); \
|
||||
(bri) = (ar) + (ai); \
|
||||
}
|
||||
|
||||
#define bli_zcopyri3s( ar, ai, br, bi, bri ) \
|
||||
{ \
|
||||
(br) = (ar); \
|
||||
(bi) = (ai); \
|
||||
(bri) = (ar) + (ai); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
79
frame/include/level0/ri3/bli_scal2jri3s.h
Normal file
79
frame/include/level0/ri3/bli_scal2jri3s.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SCAL2JRI3S_H
|
||||
#define BLIS_SCAL2JRI3S_H
|
||||
|
||||
// scal2jri3s
|
||||
|
||||
#define bli_sscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
}
|
||||
|
||||
#define bli_dscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
}
|
||||
|
||||
#define bli_cscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr) + (ai) * (xi); \
|
||||
(yi) = (ai) * (xr) - (ar) * (xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#define bli_zscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr) + (ai) * (xi); \
|
||||
(yi) = (ai) * (xr) - (ar) * (xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#define bli_scscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
(yi) = (ar) * -(xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#define bli_dzscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
(yi) = (ar) * -(xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
79
frame/include/level0/ri3/bli_scal2ri3s.h
Normal file
79
frame/include/level0/ri3/bli_scal2ri3s.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SCAL2RI3S_H
|
||||
#define BLIS_SCAL2RI3S_H
|
||||
|
||||
// scal2ri3s
|
||||
|
||||
#define bli_sscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
}
|
||||
|
||||
#define bli_dscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
}
|
||||
|
||||
#define bli_cscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr) - (ai) * (xi); \
|
||||
(yi) = (ai) * (xr) + (ar) * (xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#define bli_zscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr) - (ai) * (xi); \
|
||||
(yi) = (ai) * (xr) + (ar) * (xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#define bli_scscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
(yi) = (ar) * (xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#define bli_dzscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \
|
||||
{ \
|
||||
(yr) = (ar) * (xr); \
|
||||
(yi) = (ar) * (xi); \
|
||||
(yri) = (yr) + (yi); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user