mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Added support for zen3 configuration
- User can now specify zen3 configuration,
currently it reuses block sizes and kernels from zen2.
- Auto configuration can detect and enable if zen3 config is needed
- Added support for amd64 bundle which contains all zen platforms
- Moved exiting amd bundle to amd64 legacy.
AMD-Internal: [CPUPL-500, CPUPL-1013]
Change-Id: I60b0b8abc6d2821c27ff0f5f6e032e889194b957
This commit is contained in:
committed by
Dipal M Zambare
parent
6896f927da
commit
9c7814da1c
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -32,15 +33,10 @@
|
||||
|
||||
*/
|
||||
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
#ifndef BLIS_FAMILY_AMD64_H
|
||||
#define BLIS_FAMILY_AMD64_H
|
||||
|
||||
// Place holder for bundle configuration.
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
|
||||
|
||||
//#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#
|
||||
#
|
||||
# BLIS
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
@@ -42,47 +42,8 @@ THIS_CONFIG := amd64
|
||||
# --- Determine the C compiler and related flags ---
|
||||
#
|
||||
|
||||
# NOTE: The build system will append these variables with various
|
||||
# general-purpose/configuration-agnostic flags in common.mk. You
|
||||
# may specify additional flags here as needed.
|
||||
CPPROCFLAGS :=
|
||||
CMISCFLAGS :=
|
||||
CPICFLAGS :=
|
||||
CWARNFLAGS :=
|
||||
|
||||
ifneq ($(DEBUG_TYPE),off)
|
||||
CDBGFLAGS := -g
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG_TYPE),noopt)
|
||||
COPTFLAGS := -O0
|
||||
else
|
||||
COPTFLAGS := -O3
|
||||
endif
|
||||
|
||||
# Flags specific to optimized kernels.
|
||||
CKOPTFLAGS := $(COPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2
|
||||
else
|
||||
$(error gcc or clang are required for this configuration.)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Flags specific to reference kernels.
|
||||
CROPTFLAGS := $(CKOPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
||||
else
|
||||
CRVECFLAGS := $(CKVECFLAGS)
|
||||
endif
|
||||
endif
|
||||
# These setting should come from makefiles for individial configuration
|
||||
# included in this bundle.
|
||||
|
||||
# Store all of the variables here to new variables containing the
|
||||
# configuration name.
|
||||
|
||||
42
config/amd64_legacy/bli_family_amd64.h
Normal file
42
config/amd64_legacy/bli_family_amd64.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_FAMILY_AMD64_LEG_H
|
||||
#define BLIS_FAMILY_AMD64_LEG_H
|
||||
|
||||
// Place holder for bundle configuration.
|
||||
|
||||
#endif
|
||||
|
||||
51
config/amd64_legacy/make_defs.mk
Normal file
51
config/amd64_legacy/make_defs.mk
Normal file
@@ -0,0 +1,51 @@
|
||||
#
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas at Austin
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
# Declare the name of the current configuration and add it to the
|
||||
# running list of configurations included by common.mk.
|
||||
THIS_CONFIG := amd64_legacy
|
||||
#CONFIGS_INCL += $(THIS_CONFIG)
|
||||
|
||||
#
|
||||
# --- Determine the C compiler and related flags ---
|
||||
#
|
||||
|
||||
# These setting should come from makefiles for individial configuration
|
||||
# included in this bundle.
|
||||
|
||||
# Store all of the variables here to new variables containing the
|
||||
# configuration name.
|
||||
$(eval $(call store-make-defs,$(THIS_CONFIG)))
|
||||
|
||||
267
config/zen3/bli_cntx_init_zen3.c
Normal file
267
config/zen3/bli_cntx_init_zen3.c
Normal file
@@ -0,0 +1,267 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_cntx_init_zen3( cntx_t* cntx )
|
||||
{
|
||||
blksz_t blkszs[ BLIS_NUM_BLKSZS ];
|
||||
blksz_t thresh[ BLIS_NUM_THRESH ];
|
||||
// Set default kernel blocksizes and functions.
|
||||
bli_cntx_init_zen3_ref( cntx );
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Update the context with optimized native gemm micro-kernels and
|
||||
// their storage preferences.
|
||||
bli_cntx_set_l3_nat_ukrs
|
||||
(
|
||||
8,
|
||||
// gemm
|
||||
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE,
|
||||
// gemmtrsm_l
|
||||
BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE,
|
||||
// gemmtrsm_u
|
||||
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE,
|
||||
cntx
|
||||
);
|
||||
|
||||
// packm kernels
|
||||
bli_cntx_set_packm_kers
|
||||
(
|
||||
2,
|
||||
BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_8xk_gen_zen,
|
||||
BLIS_PACKM_6XK_KER, BLIS_DOUBLE, bli_dpackm_6xk_gen_zen,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Update the context with optimized level-1f kernels.
|
||||
bli_cntx_set_l1f_kers
|
||||
(
|
||||
4,
|
||||
// axpyf
|
||||
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_5,
|
||||
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_5,
|
||||
// dotxf
|
||||
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
|
||||
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Update the context with optimized level-1v kernels.
|
||||
bli_cntx_set_l1v_kers
|
||||
(
|
||||
16,
|
||||
#if 1
|
||||
// amaxv
|
||||
BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int,
|
||||
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int,
|
||||
#endif
|
||||
// axpyv
|
||||
|
||||
// axpyv
|
||||
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
|
||||
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
|
||||
|
||||
// dotv
|
||||
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int10,
|
||||
BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int10,
|
||||
|
||||
// dotxv
|
||||
BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int,
|
||||
BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int,
|
||||
|
||||
// scalv
|
||||
BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10,
|
||||
BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10,
|
||||
|
||||
//swap
|
||||
BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_zen_int8,
|
||||
BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_zen_int8,
|
||||
|
||||
//copy
|
||||
BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_zen_int,
|
||||
BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_zen_int,
|
||||
|
||||
//set
|
||||
BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_zen_int,
|
||||
BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_zen_int,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Initialize level-3 blocksize objects with architecture-specific values.
|
||||
// s d c z
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );
|
||||
#if AOCL_BLIS_MULTIINSTANCE
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
|
||||
#else
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
|
||||
#endif
|
||||
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes (and multiples) for native execution.
|
||||
bli_cntx_set_blkszs
|
||||
(
|
||||
BLIS_NAT, 7,
|
||||
// level-3
|
||||
BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR,
|
||||
BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR,
|
||||
BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR,
|
||||
BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR,
|
||||
BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR,
|
||||
// level-1f
|
||||
BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF,
|
||||
BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF,
|
||||
cntx
|
||||
);
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
//Initialize TRSM blocksize objects with architecture-specific values.
|
||||
//Using different cache block sizes for TRSM instead of common level-3 block sizes.
|
||||
//Tuning is done for double-precision only.
|
||||
// s d c z
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 492, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 1600, 4080, 4080 );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes for level-3 TRSM problems.
|
||||
bli_cntx_set_trsm_blkszs
|
||||
(
|
||||
5,
|
||||
BLIS_NC, &blkszs[ BLIS_NC ],
|
||||
BLIS_KC, &blkszs[ BLIS_KC ],
|
||||
BLIS_MC, &blkszs[ BLIS_MC ],
|
||||
BLIS_NR, &blkszs[ BLIS_NR ],
|
||||
BLIS_MR, &blkszs[ BLIS_MR ],
|
||||
cntx
|
||||
);
|
||||
|
||||
// Initialize sup thresholds with architecture-appropriate values. s d c z
|
||||
bli_blksz_init_easy( &thresh[ BLIS_MT ], 512, 256, 380, 110 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_NT ], 200, 256, 256, 128 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_KT ], 240, 220, 220, 110 );
|
||||
|
||||
// Initialize the context with the sup thresholds.
|
||||
bli_cntx_set_l3_sup_thresh
|
||||
(
|
||||
3,
|
||||
BLIS_MT, &thresh[ BLIS_MT ],
|
||||
BLIS_NT, &thresh[ BLIS_NT ],
|
||||
BLIS_KT, &thresh[ BLIS_KT ],
|
||||
cntx
|
||||
);
|
||||
|
||||
// Initialize the context with the sup handlers.
|
||||
bli_cntx_set_l3_sup_handlers
|
||||
(
|
||||
2,
|
||||
BLIS_GEMM, bli_gemmsup_ref,
|
||||
BLIS_GEMMT, bli_gemmtsup_ref,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Update the context with optimized small/unpacked gemm kernels.
|
||||
bli_cntx_set_l3_sup_kers
|
||||
(
|
||||
28,
|
||||
//BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref,
|
||||
BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
|
||||
BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE,
|
||||
BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
|
||||
BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
|
||||
BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
|
||||
BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE,
|
||||
BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
|
||||
BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
|
||||
BLIS_RRR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE,
|
||||
BLIS_RRC, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16m, TRUE,
|
||||
BLIS_RCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE,
|
||||
BLIS_RCC, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, TRUE,
|
||||
BLIS_CRR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE,
|
||||
BLIS_CRC, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16n, TRUE,
|
||||
BLIS_CCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, TRUE,
|
||||
BLIS_CCC, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, TRUE,
|
||||
BLIS_RRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
|
||||
BLIS_RCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
|
||||
BLIS_CRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
|
||||
BLIS_RCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
|
||||
BLIS_CCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
|
||||
BLIS_CCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
|
||||
BLIS_RRR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, TRUE,
|
||||
BLIS_RCR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, TRUE,
|
||||
BLIS_CRR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, TRUE,
|
||||
BLIS_RCC, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, TRUE,
|
||||
BLIS_CCR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, TRUE,
|
||||
BLIS_CCC, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, TRUE,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Initialize level-3 sup blocksize objects with architecture-specific
|
||||
// values.
|
||||
// s d c z
|
||||
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 6, 3, 3,
|
||||
9, 9, 3, 3 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 512, 256, 128, 64 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 2040, 1020 );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes for small/unpacked level-3 problems.
|
||||
bli_cntx_set_l3_sup_blkszs
|
||||
(
|
||||
5,
|
||||
BLIS_NC, &blkszs[ BLIS_NC ],
|
||||
BLIS_KC, &blkszs[ BLIS_KC ],
|
||||
BLIS_MC, &blkszs[ BLIS_MC ],
|
||||
BLIS_NR, &blkszs[ BLIS_NR ],
|
||||
BLIS_MR, &blkszs[ BLIS_MR ],
|
||||
cntx
|
||||
);
|
||||
}
|
||||
92
config/zen3/bli_family_zen3.h
Normal file
92
config/zen3/bli_family_zen3.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLI_FAMILY_ZEN3_
|
||||
#define BLI_FAMILY_ZEN3_
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
// Setting these macros to 1 will force JR and IR inner loops
|
||||
// to be not paralleized.
|
||||
//
|
||||
// will be enabled later if required after block size tuning.
|
||||
|
||||
//#define BLIS_THREAD_MAX_IR 1
|
||||
//#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
#define BLIS_SMALL_K_RECT_MATRIX_THRES 128
|
||||
|
||||
#define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n)
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_TRSM 128
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
#define BLIS_SMALL_MATRIX_THRES_ROME 400
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 80
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M 40
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M 1000
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N 10
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M 5
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N 130
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME 120
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M 10
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N 1200
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M 30
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N 280
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N 100
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME 110
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N 30
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME 120
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME_COL_PANEL_N 50
|
||||
|
||||
// When running HPL with pure MPI without DGEMM threading (Single-threaded
|
||||
// BLIS), defining this macro as 1 yields better performance.
|
||||
#define AOCL_BLIS_MULTIINSTANCE 0
|
||||
|
||||
#endif
|
||||
121
config/zen3/make_defs.mk
Normal file
121
config/zen3/make_defs.mk
Normal file
@@ -0,0 +1,121 @@
|
||||
#
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas at Austin
|
||||
# Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
# FLAGS that are specific to the 'zen3' architecture are added here.
|
||||
# FLAGS that are common for all the AMD architectures are present in
|
||||
# config/zen/amd_config.mk.
|
||||
|
||||
# Declare the name of the current configuration and add it to the
|
||||
# running list of configurations included by common.mk.
|
||||
THIS_CONFIG := zen3
|
||||
#CONFIGS_INCL += $(THIS_CONFIG)
|
||||
|
||||
#
|
||||
# --- Determine the C compiler and related flags ---
|
||||
#
|
||||
|
||||
# NOTE: The build system will append these variables with various
|
||||
# general-purpose/configuration-agnostic flags in common.mk. You
|
||||
# may specify additional flags here as needed.
|
||||
CPPROCFLAGS :=
|
||||
CMISCFLAGS :=
|
||||
CPICFLAGS :=
|
||||
CWARNFLAGS :=
|
||||
|
||||
ifneq ($(DEBUG_TYPE),off)
|
||||
CDBGFLAGS := -g
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG_TYPE),noopt)
|
||||
COPTFLAGS := -O0
|
||||
else
|
||||
#frame pointers are needed to execution tracing
|
||||
ifeq ($(ETRACE_ENABLE),1)
|
||||
COPTFLAGS := -O3
|
||||
else
|
||||
COPTFLAGS := -O3 -fomit-frame-pointer
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#
|
||||
# --- Enable ETRACE across the library if enabled ETRACE_ENABLE=[0,1] -----------------------
|
||||
#
|
||||
|
||||
ifeq ($(ETRACE_ENABLE),1)
|
||||
CDBGFLAGS += -pg -finstrument-functions -DAOCL_DTL_AUTO_TRACE_ENABLE
|
||||
LDFLAGS += -ldl
|
||||
endif
|
||||
|
||||
# Flags specific to optimized kernels.
|
||||
CKOPTFLAGS := $(COPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
GCC_VERSION := $(strip $(shell gcc -dumpversion | cut -d. -f1))
|
||||
#gcc or clang version must be atleast 4.0
|
||||
# gcc 9.0 or later:
|
||||
ifeq ($(shell test $(GCC_VERSION) -ge 9; echo $$?),0)
|
||||
CKVECFLAGS += -march=znver2
|
||||
else
|
||||
# If gcc is older than 9.1.0 but at least 6.1.0, then we can use -march=znver1
|
||||
# as the fallback option.
|
||||
CRVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store
|
||||
CKVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store
|
||||
endif
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
ifeq ($(strip $(shell clang -v |&head -1 |grep -c 'AOCC.LLVM.2.0.0')),1)
|
||||
CKVECFLAGS += -march=znver2
|
||||
else
|
||||
#if compiling with clang
|
||||
VENDOR_STRING := $(strip $(shell ${CC_VENDOR} --version | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*'))
|
||||
CC_MAJOR := $(shell (echo ${VENDOR_STRING} | cut -d. -f1))
|
||||
#clang 9.0 or later:
|
||||
ifeq ($(shell test $(CC_MAJOR) -ge 9; echo $$?),0)
|
||||
CKVECFLAGS += -march=znver2
|
||||
else
|
||||
CKVECFLAGS += -march=znver1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Flags specific to reference kernels.
|
||||
CROPTFLAGS := $(CKOPTFLAGS)
|
||||
CRVECFLAGS := $(CKVECFLAGS)
|
||||
|
||||
# Store all of the variables here to new variables containing the
|
||||
# configuration name.
|
||||
$(eval $(call store-make-defs,$(THIS_CONFIG)))
|
||||
|
||||
@@ -8,9 +8,10 @@
|
||||
#
|
||||
|
||||
# Processor families.
|
||||
x86_64: intel64 amd64
|
||||
intel64: skx knl haswell sandybridge penryn generic
|
||||
amd64: excavator steamroller piledriver bulldozer generic
|
||||
x86_64: intel64 amd64 and64_legacy
|
||||
intel64: skx knl haswell sandybridge penryn generic
|
||||
amd64_legacy: excavator steamroller piledriver bulldozer generic
|
||||
amd64: zen3 zen2 zen generic
|
||||
# NOTE: ARM families will remain disabled until runtime hardware detection
|
||||
# logic is added to BLIS.
|
||||
#arm64: cortexa57 generic
|
||||
@@ -24,6 +25,7 @@ sandybridge: sandybridge
|
||||
penryn: penryn
|
||||
|
||||
# AMD architectures.
|
||||
zen3: zen3/zen3/zen2/zen/haswell
|
||||
zen2: zen2/zen2/zen/haswell
|
||||
zen: zen/zen/haswell
|
||||
excavator: excavator/piledriver
|
||||
|
||||
2
configure
vendored
2
configure
vendored
@@ -3050,7 +3050,7 @@ main()
|
||||
#create a AOCL specific #define
|
||||
#This macro is enabled only for zen family configurations.
|
||||
#This enables us to use different cache block sizes for TRSM instead of common level-3 block sizes.
|
||||
uconf=$(echo ${config_name} | grep -c 'zen' | cut -d. -f1)
|
||||
uconf=$(echo ${config_name} | grep -c 'zen\|amd64' | cut -d. -f1)
|
||||
if [[ $uconf == 1 ]]; then
|
||||
enable_aocl_zen='yes'
|
||||
enable_aocl_zen_01=1
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018-2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018-2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -110,6 +110,9 @@ void bli_arch_set_id( void )
|
||||
#endif
|
||||
|
||||
// AMD microarchitectures.
|
||||
#ifdef BLIS_FAMILY_ZEN3
|
||||
id = BLIS_ARCH_ZEN3;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN2
|
||||
id = BLIS_ARCH_ZEN2;
|
||||
#endif
|
||||
@@ -185,6 +188,7 @@ static char* config_name[ BLIS_NUM_ARCHS ] =
|
||||
"sandybridge",
|
||||
"penryn",
|
||||
|
||||
"zen3",
|
||||
"zen2",
|
||||
"zen",
|
||||
"excavator",
|
||||
@@ -201,7 +205,7 @@ static char* config_name[ BLIS_NUM_ARCHS ] =
|
||||
"power9",
|
||||
"power7",
|
||||
"bgq",
|
||||
|
||||
|
||||
"generic"
|
||||
};
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018-2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018-2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2019, Dave Love, University of Manchester
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "bli_system.h"
|
||||
#include "bli_type_defs.h"
|
||||
#include "bli_cpuid.h"
|
||||
#include "bli_arch.h"
|
||||
#else
|
||||
#include "blis.h"
|
||||
#include "bli_arch.h"
|
||||
@@ -112,6 +113,10 @@ arch_t bli_cpuid_query_id( void )
|
||||
|
||||
// Check for each AMD configuration that is enabled, check for that
|
||||
// microarchitecture. We check from most recent to most dated.
|
||||
#ifdef BLIS_CONFIG_ZEN3
|
||||
if ( bli_cpuid_is_zen3( family, model, features ) )
|
||||
return BLIS_ARCH_ZEN3;
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
if ( bli_cpuid_is_zen2( family, model, features ) )
|
||||
return BLIS_ARCH_ZEN2;
|
||||
@@ -259,6 +264,34 @@ bool_t bli_cpuid_is_penryn
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
bool_t bli_cpuid_is_zen3
|
||||
(
|
||||
uint32_t family,
|
||||
uint32_t model,
|
||||
uint32_t features
|
||||
)
|
||||
{
|
||||
// Check for expected CPU features.
|
||||
const uint32_t expected = FEATURE_AVX |
|
||||
FEATURE_FMA3 |
|
||||
FEATURE_AVX2;
|
||||
|
||||
if ( !bli_cpuid_has_features( features, expected ) ) return FALSE;
|
||||
|
||||
// For zen3 the family id is 0x19
|
||||
if ( family != 0x19 ) return FALSE;
|
||||
|
||||
// Finally, check for specific models:
|
||||
// - 0x00-0xff (THIS NEEDS UPDATING)
|
||||
const bool_t is_arch
|
||||
=
|
||||
( 0x00 <= model && model <= 0xff );
|
||||
|
||||
if ( !is_arch ) return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
bool_t bli_cpuid_is_zen2
|
||||
(
|
||||
uint32_t family,
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018-2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018-2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -61,6 +61,7 @@ bool_t bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t fe
|
||||
bool_t bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features );
|
||||
|
||||
// AMD
|
||||
BLIS_EXPORT_BLIS bool_t bli_cpuid_is_zen3( uint32_t family, uint32_t model, uint32_t features );
|
||||
BLIS_EXPORT_BLIS bool_t bli_cpuid_is_zen2( uint32_t family, uint32_t model, uint32_t features );
|
||||
BLIS_EXPORT_BLIS bool_t bli_cpuid_is_zen( uint32_t family, uint32_t model, uint32_t features );
|
||||
BLIS_EXPORT_BLIS bool_t bli_cpuid_is_excavator( uint32_t family, uint32_t model, uint32_t features );
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018-2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018-2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -97,6 +97,11 @@ void bli_gks_init( void )
|
||||
#endif
|
||||
|
||||
// AMD architectures
|
||||
#ifdef BLIS_CONFIG_ZEN3
|
||||
bli_gks_register_cntx( BLIS_ARCH_ZEN3, bli_cntx_init_zen3,
|
||||
bli_cntx_init_zen3_ref,
|
||||
bli_cntx_init_zen3_ind );
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
bli_gks_register_cntx( BLIS_ARCH_ZEN2, bli_cntx_init_zen2,
|
||||
bli_cntx_init_zen2_ref,
|
||||
|
||||
@@ -62,6 +62,9 @@ CNTX_INIT_PROTS( penryn )
|
||||
#endif
|
||||
|
||||
// -- AMD64 architectures --
|
||||
#ifdef BLIS_CONFIG_ZEN3
|
||||
CNTX_INIT_PROTS( zen3 )
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
CNTX_INIT_PROTS( zen2 )
|
||||
#endif
|
||||
@@ -159,6 +162,9 @@ CNTX_INIT_PROTS( generic )
|
||||
|
||||
// -- AMD64 architectures --
|
||||
|
||||
#ifdef BLIS_FAMILY_ZEN3
|
||||
#include "bli_family_zen3.h"
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN2
|
||||
#include "bli_family_zen2.h"
|
||||
#endif
|
||||
|
||||
@@ -992,6 +992,7 @@ typedef enum
|
||||
BLIS_ARCH_PENRYN,
|
||||
|
||||
// AMD
|
||||
BLIS_ARCH_ZEN3,
|
||||
BLIS_ARCH_ZEN2,
|
||||
BLIS_ARCH_ZEN,
|
||||
BLIS_ARCH_EXCAVATOR,
|
||||
@@ -1016,9 +1017,7 @@ typedef enum
|
||||
|
||||
} arch_t;
|
||||
|
||||
// NOTE: This value must be updated to reflect the number of enum values
|
||||
// listed above for arch_t!
|
||||
#define BLIS_NUM_ARCHS (BLIS_ARCH_GENERIC+1)
|
||||
#define BLIS_NUM_ARCHS 22
|
||||
|
||||
|
||||
//
|
||||
|
||||
4
kernels/zen3/.gitignore
vendored
Normal file
4
kernels/zen3/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# Ignore everything in this directory
|
||||
*
|
||||
# Except this file
|
||||
!.gitignore
|
||||
Reference in New Issue
Block a user