Merge branch 'amd'

This commit is contained in:
Field G. Van Zee
2019-10-28 17:58:31 -05:00
297 changed files with 30324 additions and 380 deletions

View File

@@ -15,7 +15,7 @@ copyright info. All parties provide their portions of the code under the
Copyright (C) 2018, The University of Texas at Austin
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -92,6 +92,7 @@ CC := @CC@
# Important C compiler ranges.
GCC_OT_4_9_0 := @gcc_older_than_4_9_0@
GCC_OT_6_1_0 := @gcc_older_than_6_1_0@
GCC_OT_9_1_0 := @gcc_older_than_9_1_0@
# The C++ compiler. NOTE: A C++ is typically not needed.
CXX := @CXX@

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2015, The University of Texas at Austin
Copyright (C) 2017, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
# libraries.
#
# Copyright (C) 2018, The University of Texas at Austin
# Copyright (C) 2018, Advanced Micro Devices, Inc.
# Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are

83
config/zen/amd_config.mk Normal file
View File

@@ -0,0 +1,83 @@
#
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2019, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name(s) of the copyright holder(s) nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# All the common flags for AMD architectures will be added here
# NOTE: The build system will append these variables with various
# general-purpose/configuration-agnostic flags in common.mk. You
# may specify additional flags here as needed.
CPPROCFLAGS :=
CMISCFLAGS :=
CPICFLAGS :=
CWARNFLAGS :=
ifneq ($(DEBUG_TYPE),off)
CDBGFLAGS := -g
endif
ifeq ($(DEBUG_TYPE),noopt)
COPTFLAGS := -O0
else
COPTFLAGS := -O3 -fomit-frame-pointer
endif
# Flags specific to optimized kernels.
CKOPTFLAGS := $(COPTFLAGS)
ifeq ($(CC_VENDOR),gcc)
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma
else
ifeq ($(CC_VENDOR),clang)
#CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma
# When compiling with AOCC, add these flags to the default flags set above.
ifeq ($(strip $(shell clang -v |& head -1 | grep -c 'AOCC.LLVM.2.0.0')),1)
CKVECFLAGS += -mllvm -disable-licm-vrp
endif
else
$(error gcc or clang are required for this configuration.)
endif
endif
# Flags specific to reference kernels.
CROPTFLAGS := $(CKOPTFLAGS)
ifeq ($(CC_VENDOR),gcc)
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
else
ifeq ($(CC_VENDOR),clang)
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
else
CRVECFLAGS := $(CKVECFLAGS)
endif
endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -115,15 +115,37 @@ void bli_cntx_init_zen( cntx_t* cntx )
// s d c z
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );
/*
Multi Instance performance improvement of DGEMM when binded to a CCX
In Multi instance each thread runs a sequential DGEMM.
a) If BLIS is run in a multi-instance mode with
CPU freq 2.6/2.2 Ghz
DDR4 clock frequency 2400Mhz
mc = 240, kc = 512, and nc = 2040
has better performance on EPYC server, over the default block sizes.
b) If BLIS is run in Single Instance mode
mc = 510, kc = 1024 and nc = 4080
*/
#ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES
// Zen optmized level 3 cache block sizes
#if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1020, 510, 510, 255 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1024, 1024, 1024, 1024 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 );
#else
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 2040, 1528 );
#endif
#else
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
#endif
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 );
#endif
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -43,13 +43,27 @@
#define BLIS_THREAD_MAX_JR 1
#define BLIS_ENABLE_ZEN_BLOCK_SIZES
//#define BLIS_ENABLE_SMALL_MATRIX
#define BLIS_ENABLE_SMALL_MATRIX
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
// This will select the threshold below which small matrix code will be called.
#define BLIS_SMALL_MATRIX_THRES 700
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
#define BLIS_SMALL_K_RECT_MATRIX_THRES 128
#define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n)
#define BLIS_SMALL_MATRIX_A_THRES_TRSM 128
#define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96
#define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128
//This macro will enable BLIS DGEMM to choose block sizes for a single instance mode
#define BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES 0
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES 250
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES 90
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22
//#endif

View File

@@ -5,6 +5,7 @@
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2019, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -32,6 +33,9 @@
#
#
# FLAGS that are specific to the 'zen' architecture are added here.
# FLAGS that are common for all the AMD architectures are present in
# amd_config.mk.
# Declare the name of the current configuration and add it to the
# running list of configurations included by common.mk.
@@ -42,56 +46,38 @@ THIS_CONFIG := zen
# --- Determine the C compiler and related flags ---
#
# NOTE: The build system will append these variables with various
# general-purpose/configuration-agnostic flags in common.mk. You
# may specify additional flags here as needed.
CPPROCFLAGS :=
CMISCFLAGS :=
CPICFLAGS :=
CWARNFLAGS :=
# Include the file containing common flags for all AMD architectures.
AMD_CONFIG_FILE := amd_config.mk
AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen
-include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE)
ifneq ($(DEBUG_TYPE),off)
CDBGFLAGS := -g
endif
ifeq ($(DEBUG_TYPE),noopt)
COPTFLAGS := -O0
else
COPTFLAGS := -O3
endif
# Flags specific to optimized kernels.
CKOPTFLAGS := $(COPTFLAGS)
ifeq ($(CC_VENDOR),gcc)
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1
ifeq ($(GCC_OT_6_1_0),yes)
# If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the
# Bulldozer instruction sets that were omitted from Zen.
# Additionally, if gcc is 4.9 (clang 3.5?) or newer, we may want to add
# Zen-specific instructions back into the mix:
# -mclzero -madx -mrdseed -mmwaitx -msha -mxsavec -mxsaves -mclflushopt -mpopcnt
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
ifeq ($(GCC_OT_6_1_0),yes)
CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
else
# If gcc is at least 6.1.0, then we can specify the microarchitecture using
# the preferred option.
CRVECFLAGS += -march=znver1
CKVECFLAGS += -march=znver1
endif
else
ifeq ($(CC_VENDOR),clang)
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
# I couldn't find which versions of clang added support for -march=znver1,
# so we don't even bother attempting the differentiation that appears in the
# gcc branch above.
CRVECFLAGS += -march=znver1
CKVECFLAGS += -march=znver1
else
$(error gcc or clang are required for this configuration.)
endif
endif
# Flags specific to reference kernels.
CROPTFLAGS := $(CKOPTFLAGS)
ifeq ($(CC_VENDOR),gcc)
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
else
ifeq ($(CC_VENDOR),clang)
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
else
CRVECFLAGS := $(CKVECFLAGS)
endif
endif
# Store all of the variables here to new variables containing the
# configuration name.
$(eval $(call store-make-defs,$(THIS_CONFIG)))

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2017, Advanced Micro Devices, Inc.
Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc.
Copyright (C) 2018, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without

View File

@@ -0,0 +1,139 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_cntx_init_zen2( cntx_t* cntx )
{
blksz_t blkszs[ BLIS_NUM_BLKSZS ];
// Set default kernel blocksizes and functions.
bli_cntx_init_zen2_ref( cntx );
// -------------------------------------------------------------------------
// Update the context with optimized native gemm micro-kernels and
// their storage preferences.
bli_cntx_set_l3_nat_ukrs
(
8,
// gemm
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE,
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE,
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE,
// gemmtrsm_l
BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE,
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE,
// gemmtrsm_u
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE,
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE,
cntx
);
// Update the context with optimized level-1f kernels.
bli_cntx_set_l1f_kers
(
4,
// axpyf
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8,
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8,
// dotxf
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8,
cntx
);
// Update the context with optimized level-1v kernels.
bli_cntx_set_l1v_kers
(
10,
// amaxv
BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int,
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int,
// axpyv
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
// dotv
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int,
BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int,
// dotxv
BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int,
BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int,
// scalv
BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10,
BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10,
cntx
);
// Initialize level-3 blocksize objects with architecture-specific values.
// s d c z
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );
#if AOCL_BLIS_MULTIINSTANCE
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
#else
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
#endif
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
// Update the context with the current architecture's register and cache
// blocksizes (and multiples) for native execution.
bli_cntx_set_blkszs
(
BLIS_NAT, 7,
// level-3
BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR,
BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR,
BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR,
BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR,
BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR,
// level-1f
BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF,
BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF,
cntx
);
}

View File

@@ -0,0 +1,74 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLI_FAMILY_ZEN2_
#define BLI_FAMILY_ZEN2_
// By default, it is effective to parallelize the outer loops.
// Setting these macros to 1 will force JR and IR inner loops
// to be not paralleized.
#define BLIS_THREAD_MAX_IR 1
#define BLIS_THREAD_MAX_JR 1
#define BLIS_ENABLE_SMALL_MATRIX
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
// This will select the threshold below which small matrix code will be called.
#define BLIS_SMALL_MATRIX_THRES 700
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
#define BLIS_SMALL_K_RECT_MATRIX_THRES 128
#define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n)
#define BLIS_SMALL_MATRIX_A_THRES_TRSM 128
#define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96
#define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128
#define BLIS_ENABLE_SMALL_MATRIX_ROME
#define BLIS_SMALL_MATRIX_THRES_ROME 400
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME 120
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 60
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22
// When running HPL with pure MPI without DGEMM threading (Single-threaded
// BLIS), defining this macro as 1 yields better performance.
#define AOCL_BLIS_MULTIINSTANCE 0
#endif

88
config/zen2/make_defs.mk Normal file
View File

@@ -0,0 +1,88 @@
#
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2019, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name(s) of the copyright holder(s) nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# FLAGS that are specific to the 'zen2' architecture are added here.
# FLAGS that are common for all the AMD architectures are present in
# config/zen/amd_config.mk.
# Declare the name of the current configuration and add it to the
# running list of configurations included by common.mk.
THIS_CONFIG := zen2
#CONFIGS_INCL += $(THIS_CONFIG)
#
# --- Determine the C compiler and related flags ---
#
# Include file containing common flags for all AMD architectures.
AMD_CONFIG_FILE := amd_config.mk
AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen
-include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE)
ifeq ($(CC_VENDOR),gcc)
ifeq ($(GCC_OT_9_1_0),yes)
ifeq ($(GCC_OT_6_1_0),yes)
# If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the
# Bulldozer instruction sets that were omitted from Zen.
CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
else
# If gcc is older than 9.1.0 but at least 6.1.0, then we can use -march=znver1
# as the fallback option.
CRVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store
CKVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store
endif
else
# If gcc is at least 9.1.0, then we can specify the microarchitecture using
# the preferred option.
CRVECFLAGS += -march=znver2
CKVECFLAGS += -march=znver2
endif
else
ifeq ($(CC_VENDOR),clang)
# I couldn't find which versions of clang added support for -march=znver1
# or -march=znver2, so we don't even bother attempting the differentiation
# that appears in the gcc branch above.
CRVECFLAGS += -march=znver1
CKVECFLAGS += -march=znver1
else
$(error gcc or clang are required for this configuration.)
endif
endif
# Store all of the variables here to new variables containing the
# configuration name.
$(eval $(call store-make-defs,$(THIS_CONFIG)))

View File

@@ -10,7 +10,7 @@
# Processor families.
x86_64: intel64 amd64
intel64: skx knl haswell sandybridge penryn generic
amd64: zen excavator steamroller piledriver bulldozer generic
amd64: zen2 zen excavator steamroller piledriver bulldozer generic
# NOTE: ARM families will remain disabled until runtime hardware detection
# logic is added to BLIS.
#arm64: cortexa57 generic
@@ -24,6 +24,7 @@ sandybridge: sandybridge
penryn: penryn
# AMD architectures.
zen2: zen2/zen2/zen/haswell
zen: zen/zen/haswell
excavator: excavator/piledriver
steamroller: steamroller/piledriver

39
configure vendored
View File

@@ -5,7 +5,7 @@
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2019, Advanced Micro Devices, Inc.
# Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -1516,19 +1516,37 @@ check_compiler_version_ranges()
# comments:
# These older versions of gcc do not explicitly support the Zen (Zen1)
# microarchitecture; the newest microarchitectural value understood by
# these versions is '-march=bdver4' [3]. However, support for them can
# be attained in a roundabout way by starting with the instruction sets
# enabled by '-march=bdver4' and then disabling the instruction sets
# that were removed in the transition from Excavator to Zen, namely:
# FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen via the
# '-march=znver1' option [4].
# these versions is '-march=bdver4' [3]. However, basic support for these
# older versions can be attained in a roundabout way by starting with the
# instruction sets enabled by '-march=bdver4' and then disabling the
# instruction sets that were removed in the transition from Excavator to
# Zen, namely: FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen
# via the '-march=znver1' option [4].
#
# [3] https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/x86-Options.html#x86-Options
# [4] https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/x86-Options.html#x86-Options
#
# range: gcc < 9.1 (ie: 8.3 or older)
# variable: gcc_older_than_9_1_0
# comments:
# These older versions of gcc do not explicitly support the Zen2
# microarchitecture; the newest microarchitectural value understood by
# these versions is either '-march=znver1' (if !gcc_older_than_6_1_0) [5]
# or '-march=bdver4' (if gcc_older_than_6_1_0) [3]. If gcc is 6.1 or
# newer, '-march=znver1' may be used (since the instruction sets it
# enables are a subset of those enabled by '-march=znver2'); otherwise,
# '-march=bdver4' must be used in conjuction with disabling the
# instruction sets that were removed in the transition from Excavator to
# Zen, as described in the section above for gcc_older_than_6_1_0.
# Newer versions of gcc support Zen2 via the '-march=znver2' option [6].
#
# [5] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/x86-Options.html#x86-Options
# [6] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/x86-Options.html#x86-Options
#
gcc_older_than_4_9_0='no'
gcc_older_than_6_1_0='no'
gcc_older_than_9_1_0='no'
echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges."
@@ -1548,6 +1566,12 @@ check_compiler_version_ranges()
echo "${script_name}: note: found ${cc} version older than 6.1."
gcc_older_than_6_1_0='yes'
fi
# Check for gcc < 9.1.0 (ie: 8.3 or older).
if [ ${cc_major} -lt 9 ]; then
echo "${script_name}: note: found ${cc} version older than 9.1."
gcc_older_than_9_1_0='yes'
fi
fi
# icc
@@ -3056,6 +3080,7 @@ main()
| sed -e "s/@CC_VENDOR@/${cc_vendor}/g" \
| sed -e "s/@gcc_older_than_4_9_0@/${gcc_older_than_4_9_0}/g" \
| sed -e "s/@gcc_older_than_6_1_0@/${gcc_older_than_6_1_0}/g" \
| sed -e "s/@gcc_older_than_9_1_0@/${gcc_older_than_9_1_0}/g" \
| sed -e "s/@CC@/${cc_esc}/g" \
| sed -e "s/@CXX@/${cxx_esc}/g" \
| sed -e "s/@RANLIB@/${ranlib_esc}/g" \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -57,7 +57,8 @@ void bli_gemm_front
#ifdef BLIS_ENABLE_SMALL_MATRIX
// Only handle small problems separately for homogeneous datatypes.
if ( bli_obj_dt( a ) == bli_obj_dt( b ) &&
bli_obj_dt( a ) == bli_obj_dt( c ) )
bli_obj_dt( a ) == bli_obj_dt( c ) &&
bli_obj_comp_prec( c ) == bli_obj_prec( c ) )
{
gint_t status = bli_gemm_small( alpha, a, b, beta, c, cntx, cntl );
if ( status == BLIS_SUCCESS ) return;

View File

@@ -44,6 +44,7 @@ void bli_gemm_front
cntl_t* cntl
);
#ifdef BLIS_ENABLE_SMALL_MATRIX
err_t bli_gemm_small
(
obj_t* alpha,
@@ -54,3 +55,5 @@ err_t bli_gemm_small
cntx_t* cntx,
cntl_t* cntl
);
#endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2017, Advanced Micro Devices, Inc.
Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -51,6 +52,21 @@ void bli_syrk_front
obj_t at_local;
obj_t c_local;
// Alias A and C in case we need to apply transformations.
bli_obj_alias_to( a, &a_local );
bli_obj_alias_to( c, &c_local );
bli_obj_set_as_root( &c_local );
// For syrk, the right-hand "B" operand is simply A^T.
bli_obj_alias_to( a, &at_local );
bli_obj_induce_trans( &at_local );
#ifdef BLIS_ENABLE_SMALL_MATRIX
gint_t status = bli_syrk_small( alpha, &a_local, &at_local, beta, &c_local,
cntx, cntl );
if ( status == BLIS_SUCCESS ) return;
#endif
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_syrk_check( alpha, a, beta, c, cntx );
@@ -62,15 +78,6 @@ void bli_syrk_front
return;
}
// Alias A and C in case we need to apply transformations.
bli_obj_alias_to( a, &a_local );
bli_obj_alias_to( c, &c_local );
bli_obj_set_as_root( &c_local );
// For syrk, the right-hand "B" operand is simply A^T.
bli_obj_alias_to( a, &at_local );
bli_obj_induce_trans( &at_local );
// An optimization: If C is stored by rows and the micro-kernel prefers
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the

View File

@@ -42,3 +42,17 @@ void bli_syrk_front
rntm_t* rntm,
cntl_t* cntl
);
#ifdef BLIS_ENABLE_SMALL_MATRIX
err_t bli_syrk_small
(
obj_t* alpha,
obj_t* a,
obj_t* b,
obj_t* beta,
obj_t* c,
cntx_t* cntx,
cntl_t* cntl
);
#endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -51,6 +52,11 @@ void bli_trsm_front
obj_t b_local;
obj_t c_local;
#ifdef BLIS_ENABLE_SMALL_MATRIX_TRSM
gint_t status = bli_trsm_small( side, alpha, a, b, cntx, cntl );
if ( status == BLIS_SUCCESS ) return;
#endif
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_trsm_check( side, alpha, a, b, &BLIS_ZERO, b, cntx );

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -42,3 +43,16 @@ void bli_trsm_front
rntm_t* rntm,
cntl_t* cntl
);
#ifdef BLIS_ENABLE_SMALL_MATRIX
err_t bli_trsm_small
(
side_t side,
obj_t* alpha,
obj_t* a,
obj_t* b,
cntx_t* cntx,
cntl_t* cntl
);
#endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

Some files were not shown because too many files have changed in this diff Show More