mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Details:
- Standard names for reference kernels (levels-1v, -1f and 3) are now
macro constants. Examples:
BLIS_SAXPYV_KERNEL_REF
BLIS_DDOTXF_KERNEL_REF
BLIS_ZGEMM_UKERNEL_REF
- Developers no longer have to name all datatype instances of a kernel
with a common base name; [sdcz] datatype flavors of each kernel or
micro-kernel (level-1v, -1f, or 3) may now be named independently.
This means you can now, if you wish, encode the datatype-specific
register blocksizes in the name of the micro-kernel functions.
- Any datatype instances of any kernel (1v, 1f, or 3) that is left
undefined in bli_kernel.h will default to the corresponding reference
implementation. For example, if BLIS_DGEMM_UKERNEL is left undefined,
it will be defined to be BLIS_DGEMM_UKERNEL_REF.
- Developers no longer need to name level-1v/-1f kernels with multiple
datatype chars to match the number of types the kernel WOULD take in
a mixed type environment, as in bli_dddaxpyv_opt(). Now, one char is
sufficient, as in bli_daxpyv_opt().
- There is no longer a need to define an obj_t wrapper to go along with
your level-1v/-1f kernels. The framework now prvides a _kernel()
function which serves as the obj_t wrapper for whatever kernels are
specified (or defaulted to) via bli_kernel.h
- Developers no longer need to prototype their kernels, and thus no
longer need to include any prototyping headers from within
bli_kernel.h. The framework now generates kernel prototypes, with the
proper type signature, based on the kernel names defined (or defaulted
to) via bli_kernel.h.
- If the complex datatype x (of [cz]) implementation of the gemm micro-
kernel is left undefined by bli_kernel.h, but its same-precision real
domain equivalent IS defined, BLIS will use a 4m-based implementation
for the datatype x implementations of all level-3 operations, using
only the real gemm micro-kernel.
343 lines
10 KiB
C
343 lines
10 KiB
C
/*
|
|
|
|
BLIS
|
|
An object-based framework for developing high-performance BLAS-like
|
|
libraries.
|
|
|
|
Copyright (C) 2014, The University of Texas
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
- Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
- Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
- Neither the name of The University of Texas nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
#ifndef BLIS_KERNEL_POST_MACRO_DEFS_H
|
|
#define BLIS_KERNEL_POST_MACRO_DEFS_H
|
|
|
|
/*
|
|
// -- Define PASTEMAC-friendly kernel function name macros ---------------------
|
|
|
|
//
|
|
// Level-3
|
|
//
|
|
|
|
// gemm micro-kernels
|
|
|
|
#define bli_sGEMM_UKERNEL BLIS_SGEMM_UKERNEL
|
|
#define bli_dGEMM_UKERNEL BLIS_DGEMM_UKERNEL
|
|
#define bli_cGEMM_UKERNEL BLIS_CGEMM_UKERNEL
|
|
#define bli_zGEMM_UKERNEL BLIS_ZGEMM_UKERNEL
|
|
|
|
// gemmtrsm_l micro-kernels
|
|
|
|
#define bli_sGEMMTRSM_L_UKERNEL BLIS_SGEMMTRSM_L_UKERNEL
|
|
#define bli_dGEMMTRSM_L_UKERNEL BLIS_DGEMMTRSM_L_UKERNEL
|
|
#define bli_cGEMMTRSM_L_UKERNEL BLIS_CGEMMTRSM_L_UKERNEL
|
|
#define bli_zGEMMTRSM_L_UKERNEL BLIS_ZGEMMTRSM_L_UKERNEL
|
|
|
|
// gemmtrsm_u micro-kernels
|
|
|
|
#define bli_sGEMMTRSM_U_UKERNEL BLIS_SGEMMTRSM_U_UKERNEL
|
|
#define bli_dGEMMTRSM_U_UKERNEL BLIS_DGEMMTRSM_U_UKERNEL
|
|
#define bli_cGEMMTRSM_U_UKERNEL BLIS_CGEMMTRSM_U_UKERNEL
|
|
#define bli_zGEMMTRSM_U_UKERNEL BLIS_ZGEMMTRSM_U_UKERNEL
|
|
|
|
// trsm_l micro-kernels
|
|
|
|
#define bli_sTRSM_L_UKERNEL BLIS_STRSM_L_UKERNEL
|
|
#define bli_dTRSM_L_UKERNEL BLIS_DTRSM_L_UKERNEL
|
|
#define bli_cTRSM_L_UKERNEL BLIS_CTRSM_L_UKERNEL
|
|
#define bli_zTRSM_L_UKERNEL BLIS_ZTRSM_L_UKERNEL
|
|
|
|
// trsm_u micro-kernels
|
|
|
|
#define bli_sTRSM_U_UKERNEL BLIS_STRSM_U_UKERNEL
|
|
#define bli_dTRSM_U_UKERNEL BLIS_DTRSM_U_UKERNEL
|
|
#define bli_cTRSM_U_UKERNEL BLIS_CTRSM_U_UKERNEL
|
|
#define bli_zTRSM_U_UKERNEL BLIS_ZTRSM_U_UKERNEL
|
|
|
|
//
|
|
// Level-3 4m
|
|
//
|
|
|
|
// gemm4m micro-kernels
|
|
|
|
#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL
|
|
#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL
|
|
|
|
// gemmtrsm4m_l micro-kernels
|
|
|
|
#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL
|
|
#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL
|
|
|
|
// gemmtrsm4m_u micro-kernels
|
|
|
|
#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL
|
|
#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL
|
|
|
|
// trsm4m_l micro-kernels
|
|
|
|
#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL
|
|
#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL
|
|
|
|
// trsm4m_u micro-kernels
|
|
|
|
#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL
|
|
#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL
|
|
|
|
//
|
|
// Level-3 3m
|
|
//
|
|
|
|
// gemm3m micro-kernels
|
|
|
|
#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL
|
|
#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL
|
|
|
|
// gemmtrsm3m_l micro-kernels
|
|
|
|
#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL
|
|
#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL
|
|
|
|
// gemmtrsm3m_u micro-kernels
|
|
|
|
#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL
|
|
#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL
|
|
|
|
// trsm3m_l micro-kernels
|
|
|
|
#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL
|
|
#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL
|
|
|
|
// trsm3m_u micro-kernels
|
|
|
|
#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL
|
|
#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL
|
|
|
|
//
|
|
// Level-1m
|
|
//
|
|
|
|
// NOTE: We don't need any PASTEMAC-friendly aliases to packm kernel
|
|
// macros because they are used directly in the initialization of the
|
|
// function pointer array, rather than via a templatizing wrapper macro.
|
|
|
|
|
|
//
|
|
// Level-1f
|
|
//
|
|
|
|
// axpy2v kernels
|
|
|
|
#define bli_sssAXPY2V_KERNEL BLIS_SAXPY2V_KERNEL
|
|
#define bli_dddAXPY2V_KERNEL BLIS_DAXPY2V_KERNEL
|
|
#define bli_cccAXPY2V_KERNEL BLIS_CAXPY2V_KERNEL
|
|
#define bli_zzzAXPY2V_KERNEL BLIS_ZAXPY2V_KERNEL
|
|
|
|
// dotaxpyv kernels
|
|
|
|
#define bli_sssDOTAXPYV_KERNEL BLIS_SDOTAXPYV_KERNEL
|
|
#define bli_dddDOTAXPYV_KERNEL BLIS_DDOTAXPYV_KERNEL
|
|
#define bli_cccDOTAXPYV_KERNEL BLIS_CDOTAXPYV_KERNEL
|
|
#define bli_zzzDOTAXPYV_KERNEL BLIS_ZDOTAXPYV_KERNEL
|
|
|
|
// axpyf kernels
|
|
|
|
#define bli_sssAXPYF_KERNEL BLIS_SAXPYF_KERNEL
|
|
#define bli_dddAXPYF_KERNEL BLIS_DAXPYF_KERNEL
|
|
#define bli_cccAXPYF_KERNEL BLIS_CAXPYF_KERNEL
|
|
#define bli_zzzAXPYF_KERNEL BLIS_ZAXPYF_KERNEL
|
|
|
|
// dotxf kernels
|
|
|
|
#define bli_sssDOTXF_KERNEL BLIS_SDOTXF_KERNEL
|
|
#define bli_dddDOTXF_KERNEL BLIS_DDOTXF_KERNEL
|
|
#define bli_cccDOTXF_KERNEL BLIS_CDOTXF_KERNEL
|
|
#define bli_zzzDOTXF_KERNEL BLIS_ZDOTXF_KERNEL
|
|
|
|
// dotxaxpyf kernels
|
|
|
|
#define bli_sssDOTXAXPYF_KERNEL BLIS_SDOTXAXPYF_KERNEL
|
|
#define bli_dddDOTXAXPYF_KERNEL BLIS_DDOTXAXPYF_KERNEL
|
|
#define bli_cccDOTXAXPYF_KERNEL BLIS_CDOTXAXPYF_KERNEL
|
|
#define bli_zzzDOTXAXPYF_KERNEL BLIS_ZDOTXAXPYF_KERNEL
|
|
|
|
|
|
//
|
|
// Level-1v
|
|
//
|
|
|
|
// addv kernels
|
|
|
|
#define bli_ssADDV_KERNEL BLIS_SADDV_KERNEL
|
|
#define bli_ddADDV_KERNEL BLIS_DADDV_KERNEL
|
|
#define bli_ccADDV_KERNEL BLIS_CADDV_KERNEL
|
|
#define bli_zzADDV_KERNEL BLIS_ZADDV_KERNEL
|
|
|
|
// axpyv kernels
|
|
|
|
#define bli_sssAXPYV_KERNEL BLIS_SAXPYV_KERNEL
|
|
#define bli_dddAXPYV_KERNEL BLIS_DAXPYV_KERNEL
|
|
#define bli_cccAXPYV_KERNEL BLIS_CAXPYV_KERNEL
|
|
#define bli_zzzAXPYV_KERNEL BLIS_ZAXPYV_KERNEL
|
|
|
|
// copyv kernels
|
|
|
|
#define bli_ssCOPYV_KERNEL BLIS_SCOPYV_KERNEL
|
|
#define bli_ddCOPYV_KERNEL BLIS_DCOPYV_KERNEL
|
|
#define bli_ccCOPYV_KERNEL BLIS_CCOPYV_KERNEL
|
|
#define bli_zzCOPYV_KERNEL BLIS_ZCOPYV_KERNEL
|
|
|
|
// dotv kernels
|
|
|
|
#define bli_sssDOTV_KERNEL BLIS_SDOTV_KERNEL
|
|
#define bli_dddDOTV_KERNEL BLIS_DDOTV_KERNEL
|
|
#define bli_cccDOTV_KERNEL BLIS_CDOTV_KERNEL
|
|
#define bli_zzzDOTV_KERNEL BLIS_ZDOTV_KERNEL
|
|
|
|
// dotxv kernels
|
|
|
|
#define bli_sssDOTXV_KERNEL BLIS_SDOTXV_KERNEL
|
|
#define bli_dddDOTXV_KERNEL BLIS_DDOTXV_KERNEL
|
|
#define bli_cccDOTXV_KERNEL BLIS_CDOTXV_KERNEL
|
|
#define bli_zzzDOTXV_KERNEL BLIS_ZDOTXV_KERNEL
|
|
|
|
// invertv kernels
|
|
|
|
#define bli_sINVERTV_KERNEL BLIS_SINVERTV_KERNEL
|
|
#define bli_dINVERTV_KERNEL BLIS_DINVERTV_KERNEL
|
|
#define bli_cINVERTV_KERNEL BLIS_CINVERTV_KERNEL
|
|
#define bli_zINVERTV_KERNEL BLIS_ZINVERTV_KERNEL
|
|
|
|
// scal2v kernels
|
|
|
|
#define bli_sssSCAL2V_KERNEL BLIS_SSCAL2V_KERNEL
|
|
#define bli_dddSCAL2V_KERNEL BLIS_DSCAL2V_KERNEL
|
|
#define bli_cccSCAL2V_KERNEL BLIS_CSCAL2V_KERNEL
|
|
#define bli_zzzSCAL2V_KERNEL BLIS_ZSCAL2V_KERNEL
|
|
|
|
// scalv kernels
|
|
|
|
#define bli_ssSCALV_KERNEL BLIS_SSCALV_KERNEL
|
|
#define bli_ddSCALV_KERNEL BLIS_DSCALV_KERNEL
|
|
#define bli_ccSCALV_KERNEL BLIS_CSCALV_KERNEL
|
|
#define bli_zzSCALV_KERNEL BLIS_ZSCALV_KERNEL
|
|
|
|
// setv kernels
|
|
|
|
#define bli_ssSETV_KERNEL BLIS_SSETV_KERNEL
|
|
#define bli_ddSETV_KERNEL BLIS_DSETV_KERNEL
|
|
#define bli_ccSETV_KERNEL BLIS_CSETV_KERNEL
|
|
#define bli_zzSETV_KERNEL BLIS_ZSETV_KERNEL
|
|
|
|
// subv kernels
|
|
|
|
#define bli_ssSUBV_KERNEL BLIS_SSUBV_KERNEL
|
|
#define bli_ddSUBV_KERNEL BLIS_DSUBV_KERNEL
|
|
#define bli_ccSUBV_KERNEL BLIS_CSUBV_KERNEL
|
|
#define bli_zzSUBV_KERNEL BLIS_ZSUBV_KERNEL
|
|
|
|
// swapv kernels
|
|
|
|
#define bli_ssSWAPV_KERNEL BLIS_SSWAPV_KERNEL
|
|
#define bli_ddSWAPV_KERNEL BLIS_DSWAPV_KERNEL
|
|
#define bli_ccSWAPV_KERNEL BLIS_CSWAPV_KERNEL
|
|
#define bli_zzSWAPV_KERNEL BLIS_ZSWAPV_KERNEL
|
|
*/
|
|
|
|
|
|
// -- Maximum register blocksize search ----------------------------------------
|
|
|
|
//
|
|
// Find the largest register blocksize MR.
|
|
//
|
|
|
|
#define BLIS_MAX_DEFAULT_MR_S BLIS_DEFAULT_MR_S
|
|
#define BLIS_MAX_DEFAULT_MR_D BLIS_DEFAULT_MR_D
|
|
|
|
// NOTE: 4m and 3m register blocksizes are assumed to be equal. Thus,
|
|
// we only inspect the 4m values.
|
|
|
|
// c: Choose between the regular and 4m/3m blocksize.
|
|
#define BLIS_MAX_DEFAULT_MR_C BLIS_DEFAULT_MR_C
|
|
#if BLIS_DEFAULT_4M_MR_C > BLIS_MAX_DEFAULT_MR_C
|
|
#undef BLIS_MAX_DEFAULT_MR_C
|
|
#define BLIS_MAX_DEFAULT_MR_C BLIS_DEFAULT_4M_MR_C
|
|
#endif
|
|
|
|
// z: Choose between the regular and 4m/3m blocksize.
|
|
#define BLIS_MAX_DEFAULT_MR_Z BLIS_DEFAULT_MR_Z
|
|
#if BLIS_DEFAULT_4M_MR_Z > BLIS_MAX_DEFAULT_MR_Z
|
|
#undef BLIS_MAX_DEFAULT_MR_Z
|
|
#define BLIS_MAX_DEFAULT_MR_Z BLIS_DEFAULT_4M_MR_Z
|
|
#endif
|
|
|
|
//
|
|
// Find the largest register blocksize NR.
|
|
//
|
|
|
|
#define BLIS_MAX_DEFAULT_NR_S BLIS_DEFAULT_NR_S
|
|
#define BLIS_MAX_DEFAULT_NR_D BLIS_DEFAULT_NR_D
|
|
|
|
// NOTE: 4m and 3m register blocksizes are assumed to be equal. Thus,
|
|
// we only inspect the 4m values.
|
|
|
|
// c: Choose between the regular and 4m/3m blocksize.
|
|
#define BLIS_MAX_DEFAULT_NR_C BLIS_DEFAULT_NR_C
|
|
#if BLIS_DEFAULT_4M_NR_C > BLIS_MAX_DEFAULT_NR_C
|
|
#undef BLIS_MAX_DEFAULT_NR_C
|
|
#define BLIS_MAX_DEFAULT_NR_C BLIS_DEFAULT_4M_NR_C
|
|
#endif
|
|
|
|
// z: Choose between the regular and 4m/3m blocksize.
|
|
#define BLIS_MAX_DEFAULT_NR_Z BLIS_DEFAULT_NR_Z
|
|
#if BLIS_DEFAULT_4M_NR_Z > BLIS_MAX_DEFAULT_NR_Z
|
|
#undef BLIS_MAX_DEFAULT_NR_Z
|
|
#define BLIS_MAX_DEFAULT_NR_Z BLIS_DEFAULT_4M_NR_Z
|
|
#endif
|
|
|
|
|
|
// -- Abbreiviated macros ------------------------------------------------------
|
|
|
|
// Here, we shorten the maximum blocksizes found above so that they can be
|
|
// derived via the PASTEMAC macro.
|
|
|
|
// Maximum MR blocksizes
|
|
|
|
#define bli_smaxmr BLIS_MAX_DEFAULT_MR_S
|
|
#define bli_dmaxmr BLIS_MAX_DEFAULT_MR_D
|
|
#define bli_cmaxmr BLIS_MAX_DEFAULT_MR_C
|
|
#define bli_zmaxmr BLIS_MAX_DEFAULT_MR_Z
|
|
|
|
// Maximum NR blocksizes
|
|
|
|
#define bli_smaxnr BLIS_MAX_DEFAULT_NR_S
|
|
#define bli_dmaxnr BLIS_MAX_DEFAULT_NR_D
|
|
#define bli_cmaxnr BLIS_MAX_DEFAULT_NR_C
|
|
#define bli_zmaxnr BLIS_MAX_DEFAULT_NR_Z
|
|
|
|
|
|
#endif
|
|
|