mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Merge branch 'amd'
This commit is contained in:
2
LICENSE
2
LICENSE
@@ -15,7 +15,7 @@ copyright info. All parties provide their portions of the code under the
|
||||
|
||||
Copyright (C) 2018, The University of Texas at Austin
|
||||
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -92,6 +92,7 @@ CC := @CC@
|
||||
# Important C compiler ranges.
|
||||
GCC_OT_4_9_0 := @gcc_older_than_4_9_0@
|
||||
GCC_OT_6_1_0 := @gcc_older_than_6_1_0@
|
||||
GCC_OT_9_1_0 := @gcc_older_than_9_1_0@
|
||||
|
||||
# The C++ compiler. NOTE: A C++ is typically not needed.
|
||||
CXX := @CXX@
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2015, The University of Texas at Austin
|
||||
Copyright (C) 2017, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2018, The University of Texas at Austin
|
||||
# Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
# Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
|
||||
83
config/zen/amd_config.mk
Normal file
83
config/zen/amd_config.mk
Normal file
@@ -0,0 +1,83 @@
|
||||
#
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
# All the common flags for AMD architectures will be added here
|
||||
|
||||
# NOTE: The build system will append these variables with various
|
||||
# general-purpose/configuration-agnostic flags in common.mk. You
|
||||
# may specify additional flags here as needed.
|
||||
CPPROCFLAGS :=
|
||||
CMISCFLAGS :=
|
||||
CPICFLAGS :=
|
||||
CWARNFLAGS :=
|
||||
|
||||
ifneq ($(DEBUG_TYPE),off)
|
||||
CDBGFLAGS := -g
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG_TYPE),noopt)
|
||||
COPTFLAGS := -O0
|
||||
else
|
||||
COPTFLAGS := -O3 -fomit-frame-pointer
|
||||
endif
|
||||
|
||||
# Flags specific to optimized kernels.
|
||||
CKOPTFLAGS := $(COPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
#CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma
|
||||
# When compiling with AOCC, add these flags to the default flags set above.
|
||||
ifeq ($(strip $(shell clang -v |& head -1 | grep -c 'AOCC.LLVM.2.0.0')),1)
|
||||
CKVECFLAGS += -mllvm -disable-licm-vrp
|
||||
endif
|
||||
else
|
||||
$(error gcc or clang are required for this configuration.)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Flags specific to reference kernels.
|
||||
CROPTFLAGS := $(CKOPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
||||
else
|
||||
CRVECFLAGS := $(CKVECFLAGS)
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -115,15 +115,37 @@ void bli_cntx_init_zen( cntx_t* cntx )
|
||||
// s d c z
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );
|
||||
|
||||
/*
|
||||
Multi Instance performance improvement of DGEMM when binded to a CCX
|
||||
In Multi instance each thread runs a sequential DGEMM.
|
||||
|
||||
a) If BLIS is run in a multi-instance mode with
|
||||
CPU freq 2.6/2.2 Ghz
|
||||
DDR4 clock frequency 2400Mhz
|
||||
mc = 240, kc = 512, and nc = 2040
|
||||
has better performance on EPYC server, over the default block sizes.
|
||||
|
||||
b) If BLIS is run in Single Instance mode
|
||||
mc = 510, kc = 1024 and nc = 4080
|
||||
*/
|
||||
|
||||
#ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES
|
||||
// Zen optmized level 3 cache block sizes
|
||||
#if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1020, 510, 510, 255 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1024, 1024, 1024, 1024 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 );
|
||||
#else
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 2040, 1528 );
|
||||
#endif
|
||||
#else
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
|
||||
#endif
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 );
|
||||
#endif
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -43,13 +43,27 @@
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_ZEN_BLOCK_SIZES
|
||||
//#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
#define BLIS_SMALL_K_RECT_MATRIX_THRES 128
|
||||
|
||||
#define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n)
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_TRSM 128
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128
|
||||
|
||||
//This macro will enable BLIS DGEMM to choose block sizes for a single instance mode
|
||||
#define BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES 0
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES 250
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES 90
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22
|
||||
|
||||
|
||||
//#endif
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas at Austin
|
||||
# Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
@@ -32,6 +33,9 @@
|
||||
#
|
||||
#
|
||||
|
||||
# FLAGS that are specific to the 'zen' architecture are added here.
|
||||
# FLAGS that are common for all the AMD architectures are present in
|
||||
# amd_config.mk.
|
||||
|
||||
# Declare the name of the current configuration and add it to the
|
||||
# running list of configurations included by common.mk.
|
||||
@@ -42,56 +46,38 @@ THIS_CONFIG := zen
|
||||
# --- Determine the C compiler and related flags ---
|
||||
#
|
||||
|
||||
# NOTE: The build system will append these variables with various
|
||||
# general-purpose/configuration-agnostic flags in common.mk. You
|
||||
# may specify additional flags here as needed.
|
||||
CPPROCFLAGS :=
|
||||
CMISCFLAGS :=
|
||||
CPICFLAGS :=
|
||||
CWARNFLAGS :=
|
||||
# Include the file containing common flags for all AMD architectures.
|
||||
AMD_CONFIG_FILE := amd_config.mk
|
||||
AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen
|
||||
-include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE)
|
||||
|
||||
ifneq ($(DEBUG_TYPE),off)
|
||||
CDBGFLAGS := -g
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG_TYPE),noopt)
|
||||
COPTFLAGS := -O0
|
||||
else
|
||||
COPTFLAGS := -O3
|
||||
endif
|
||||
|
||||
# Flags specific to optimized kernels.
|
||||
CKOPTFLAGS := $(COPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1
|
||||
ifeq ($(GCC_OT_6_1_0),yes)
|
||||
# If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the
|
||||
# Bulldozer instruction sets that were omitted from Zen.
|
||||
# Additionally, if gcc is 4.9 (clang 3.5?) or newer, we may want to add
|
||||
# Zen-specific instructions back into the mix:
|
||||
# -mclzero -madx -mrdseed -mmwaitx -msha -mxsavec -mxsaves -mclflushopt -mpopcnt
|
||||
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
ifeq ($(GCC_OT_6_1_0),yes)
|
||||
CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
else
|
||||
# If gcc is at least 6.1.0, then we can specify the microarchitecture using
|
||||
# the preferred option.
|
||||
CRVECFLAGS += -march=znver1
|
||||
CKVECFLAGS += -march=znver1
|
||||
endif
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
# I couldn't find which versions of clang added support for -march=znver1,
|
||||
# so we don't even bother attempting the differentiation that appears in the
|
||||
# gcc branch above.
|
||||
CRVECFLAGS += -march=znver1
|
||||
CKVECFLAGS += -march=znver1
|
||||
else
|
||||
$(error gcc or clang are required for this configuration.)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Flags specific to reference kernels.
|
||||
CROPTFLAGS := $(CKOPTFLAGS)
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
||||
else
|
||||
CRVECFLAGS := $(CKVECFLAGS)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Store all of the variables here to new variables containing the
|
||||
# configuration name.
|
||||
$(eval $(call store-make-defs,$(THIS_CONFIG)))
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2017, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
139
config/zen2/bli_cntx_init_zen2.c
Normal file
139
config/zen2/bli_cntx_init_zen2.c
Normal file
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_cntx_init_zen2( cntx_t* cntx )
|
||||
{
|
||||
blksz_t blkszs[ BLIS_NUM_BLKSZS ];
|
||||
|
||||
// Set default kernel blocksizes and functions.
|
||||
bli_cntx_init_zen2_ref( cntx );
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Update the context with optimized native gemm micro-kernels and
|
||||
// their storage preferences.
|
||||
bli_cntx_set_l3_nat_ukrs
|
||||
(
|
||||
8,
|
||||
// gemm
|
||||
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE,
|
||||
// gemmtrsm_l
|
||||
BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE,
|
||||
// gemmtrsm_u
|
||||
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Update the context with optimized level-1f kernels.
|
||||
bli_cntx_set_l1f_kers
|
||||
(
|
||||
4,
|
||||
// axpyf
|
||||
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8,
|
||||
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8,
|
||||
// dotxf
|
||||
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
|
||||
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Update the context with optimized level-1v kernels.
|
||||
bli_cntx_set_l1v_kers
|
||||
(
|
||||
10,
|
||||
// amaxv
|
||||
BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int,
|
||||
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int,
|
||||
// axpyv
|
||||
|
||||
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
|
||||
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
|
||||
|
||||
// dotv
|
||||
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int,
|
||||
BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int,
|
||||
// dotxv
|
||||
BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int,
|
||||
BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int,
|
||||
// scalv
|
||||
|
||||
BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10,
|
||||
BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10,
|
||||
|
||||
cntx
|
||||
);
|
||||
|
||||
// Initialize level-3 blocksize objects with architecture-specific values.
|
||||
// s d c z
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );
|
||||
#if AOCL_BLIS_MULTIINSTANCE
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
|
||||
#else
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
|
||||
#endif
|
||||
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes (and multiples) for native execution.
|
||||
bli_cntx_set_blkszs
|
||||
(
|
||||
BLIS_NAT, 7,
|
||||
// level-3
|
||||
BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR,
|
||||
BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR,
|
||||
BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR,
|
||||
BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR,
|
||||
BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR,
|
||||
// level-1f
|
||||
BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF,
|
||||
BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF,
|
||||
cntx
|
||||
);
|
||||
}
|
||||
|
||||
74
config/zen2/bli_family_zen2.h
Normal file
74
config/zen2/bli_family_zen2.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLI_FAMILY_ZEN2_
|
||||
#define BLI_FAMILY_ZEN2_
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
// Setting these macros to 1 will force JR and IR inner loops
|
||||
// to be not paralleized.
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
#define BLIS_SMALL_K_RECT_MATRIX_THRES 128
|
||||
|
||||
#define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n)
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_TRSM 128
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96
|
||||
#define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
#define BLIS_SMALL_MATRIX_THRES_ROME 400
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME 120
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 60
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22
|
||||
|
||||
// When running HPL with pure MPI without DGEMM threading (Single-threaded
|
||||
// BLIS), defining this macro as 1 yields better performance.
|
||||
#define AOCL_BLIS_MULTIINSTANCE 0
|
||||
|
||||
#endif
|
||||
|
||||
88
config/zen2/make_defs.mk
Normal file
88
config/zen2/make_defs.mk
Normal file
@@ -0,0 +1,88 @@
|
||||
#
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas at Austin
|
||||
# Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
# FLAGS that are specific to the 'zen2' architecture are added here.
|
||||
# FLAGS that are common for all the AMD architectures are present in
|
||||
# config/zen/amd_config.mk.
|
||||
|
||||
# Declare the name of the current configuration and add it to the
|
||||
# running list of configurations included by common.mk.
|
||||
THIS_CONFIG := zen2
|
||||
#CONFIGS_INCL += $(THIS_CONFIG)
|
||||
|
||||
#
|
||||
# --- Determine the C compiler and related flags ---
|
||||
#
|
||||
|
||||
# Include file containing common flags for all AMD architectures.
|
||||
AMD_CONFIG_FILE := amd_config.mk
|
||||
AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen
|
||||
-include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE)
|
||||
|
||||
ifeq ($(CC_VENDOR),gcc)
|
||||
ifeq ($(GCC_OT_9_1_0),yes)
|
||||
ifeq ($(GCC_OT_6_1_0),yes)
|
||||
# If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the
|
||||
# Bulldozer instruction sets that were omitted from Zen.
|
||||
CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
||||
else
|
||||
# If gcc is older than 9.1.0 but at least 6.1.0, then we can use -march=znver1
|
||||
# as the fallback option.
|
||||
CRVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store
|
||||
CKVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store
|
||||
endif
|
||||
else
|
||||
# If gcc is at least 9.1.0, then we can specify the microarchitecture using
|
||||
# the preferred option.
|
||||
CRVECFLAGS += -march=znver2
|
||||
CKVECFLAGS += -march=znver2
|
||||
endif
|
||||
else
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
# I couldn't find which versions of clang added support for -march=znver1
|
||||
# or -march=znver2, so we don't even bother attempting the differentiation
|
||||
# that appears in the gcc branch above.
|
||||
CRVECFLAGS += -march=znver1
|
||||
CKVECFLAGS += -march=znver1
|
||||
else
|
||||
$(error gcc or clang are required for this configuration.)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Store all of the variables here to new variables containing the
|
||||
# configuration name.
|
||||
$(eval $(call store-make-defs,$(THIS_CONFIG)))
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
# Processor families.
|
||||
x86_64: intel64 amd64
|
||||
intel64: skx knl haswell sandybridge penryn generic
|
||||
amd64: zen excavator steamroller piledriver bulldozer generic
|
||||
amd64: zen2 zen excavator steamroller piledriver bulldozer generic
|
||||
# NOTE: ARM families will remain disabled until runtime hardware detection
|
||||
# logic is added to BLIS.
|
||||
#arm64: cortexa57 generic
|
||||
@@ -24,6 +24,7 @@ sandybridge: sandybridge
|
||||
penryn: penryn
|
||||
|
||||
# AMD architectures.
|
||||
zen2: zen2/zen2/zen/haswell
|
||||
zen: zen/zen/haswell
|
||||
excavator: excavator/piledriver
|
||||
steamroller: steamroller/piledriver
|
||||
|
||||
39
configure
vendored
39
configure
vendored
@@ -5,7 +5,7 @@
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas at Austin
|
||||
# Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
# Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
@@ -1516,19 +1516,37 @@ check_compiler_version_ranges()
|
||||
# comments:
|
||||
# These older versions of gcc do not explicitly support the Zen (Zen1)
|
||||
# microarchitecture; the newest microarchitectural value understood by
|
||||
# these versions is '-march=bdver4' [3]. However, support for them can
|
||||
# be attained in a roundabout way by starting with the instruction sets
|
||||
# enabled by '-march=bdver4' and then disabling the instruction sets
|
||||
# that were removed in the transition from Excavator to Zen, namely:
|
||||
# FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen via the
|
||||
# '-march=znver1' option [4].
|
||||
# these versions is '-march=bdver4' [3]. However, basic support for these
|
||||
# older versions can be attained in a roundabout way by starting with the
|
||||
# instruction sets enabled by '-march=bdver4' and then disabling the
|
||||
# instruction sets that were removed in the transition from Excavator to
|
||||
# Zen, namely: FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen
|
||||
# via the '-march=znver1' option [4].
|
||||
#
|
||||
# [3] https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/x86-Options.html#x86-Options
|
||||
# [4] https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/x86-Options.html#x86-Options
|
||||
#
|
||||
# range: gcc < 9.1 (ie: 8.3 or older)
|
||||
# variable: gcc_older_than_9_1_0
|
||||
# comments:
|
||||
# These older versions of gcc do not explicitly support the Zen2
|
||||
# microarchitecture; the newest microarchitectural value understood by
|
||||
# these versions is either '-march=znver1' (if !gcc_older_than_6_1_0) [5]
|
||||
# or '-march=bdver4' (if gcc_older_than_6_1_0) [3]. If gcc is 6.1 or
|
||||
# newer, '-march=znver1' may be used (since the instruction sets it
|
||||
# enables are a subset of those enabled by '-march=znver2'); otherwise,
|
||||
# '-march=bdver4' must be used in conjuction with disabling the
|
||||
# instruction sets that were removed in the transition from Excavator to
|
||||
# Zen, as described in the section above for gcc_older_than_6_1_0.
|
||||
# Newer versions of gcc support Zen2 via the '-march=znver2' option [6].
|
||||
#
|
||||
# [5] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/x86-Options.html#x86-Options
|
||||
# [6] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/x86-Options.html#x86-Options
|
||||
#
|
||||
|
||||
gcc_older_than_4_9_0='no'
|
||||
gcc_older_than_6_1_0='no'
|
||||
gcc_older_than_9_1_0='no'
|
||||
|
||||
echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges."
|
||||
|
||||
@@ -1548,6 +1566,12 @@ check_compiler_version_ranges()
|
||||
echo "${script_name}: note: found ${cc} version older than 6.1."
|
||||
gcc_older_than_6_1_0='yes'
|
||||
fi
|
||||
|
||||
# Check for gcc < 9.1.0 (ie: 8.3 or older).
|
||||
if [ ${cc_major} -lt 9 ]; then
|
||||
echo "${script_name}: note: found ${cc} version older than 9.1."
|
||||
gcc_older_than_9_1_0='yes'
|
||||
fi
|
||||
fi
|
||||
|
||||
# icc
|
||||
@@ -3056,6 +3080,7 @@ main()
|
||||
| sed -e "s/@CC_VENDOR@/${cc_vendor}/g" \
|
||||
| sed -e "s/@gcc_older_than_4_9_0@/${gcc_older_than_4_9_0}/g" \
|
||||
| sed -e "s/@gcc_older_than_6_1_0@/${gcc_older_than_6_1_0}/g" \
|
||||
| sed -e "s/@gcc_older_than_9_1_0@/${gcc_older_than_9_1_0}/g" \
|
||||
| sed -e "s/@CC@/${cc_esc}/g" \
|
||||
| sed -e "s/@CXX@/${cxx_esc}/g" \
|
||||
| sed -e "s/@RANLIB@/${ranlib_esc}/g" \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -57,7 +57,8 @@ void bli_gemm_front
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
// Only handle small problems separately for homogeneous datatypes.
|
||||
if ( bli_obj_dt( a ) == bli_obj_dt( b ) &&
|
||||
bli_obj_dt( a ) == bli_obj_dt( c ) )
|
||||
bli_obj_dt( a ) == bli_obj_dt( c ) &&
|
||||
bli_obj_comp_prec( c ) == bli_obj_prec( c ) )
|
||||
{
|
||||
gint_t status = bli_gemm_small( alpha, a, b, beta, c, cntx, cntl );
|
||||
if ( status == BLIS_SUCCESS ) return;
|
||||
|
||||
@@ -44,6 +44,7 @@ void bli_gemm_front
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
err_t bli_gemm_small
|
||||
(
|
||||
obj_t* alpha,
|
||||
@@ -54,3 +55,5 @@ err_t bli_gemm_small
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl
|
||||
);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2017, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -51,6 +52,21 @@ void bli_syrk_front
|
||||
obj_t at_local;
|
||||
obj_t c_local;
|
||||
|
||||
// Alias A and C in case we need to apply transformations.
|
||||
bli_obj_alias_to( a, &a_local );
|
||||
bli_obj_alias_to( c, &c_local );
|
||||
bli_obj_set_as_root( &c_local );
|
||||
|
||||
// For syrk, the right-hand "B" operand is simply A^T.
|
||||
bli_obj_alias_to( a, &at_local );
|
||||
bli_obj_induce_trans( &at_local );
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
gint_t status = bli_syrk_small( alpha, &a_local, &at_local, beta, &c_local,
|
||||
cntx, cntl );
|
||||
if ( status == BLIS_SUCCESS ) return;
|
||||
#endif
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_syrk_check( alpha, a, beta, c, cntx );
|
||||
@@ -62,15 +78,6 @@ void bli_syrk_front
|
||||
return;
|
||||
}
|
||||
|
||||
// Alias A and C in case we need to apply transformations.
|
||||
bli_obj_alias_to( a, &a_local );
|
||||
bli_obj_alias_to( c, &c_local );
|
||||
bli_obj_set_as_root( &c_local );
|
||||
|
||||
// For syrk, the right-hand "B" operand is simply A^T.
|
||||
bli_obj_alias_to( a, &at_local );
|
||||
bli_obj_induce_trans( &at_local );
|
||||
|
||||
// An optimization: If C is stored by rows and the micro-kernel prefers
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
|
||||
@@ -42,3 +42,17 @@ void bli_syrk_front
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
err_t bli_syrk_small
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* b,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl
|
||||
);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -51,6 +52,11 @@ void bli_trsm_front
|
||||
obj_t b_local;
|
||||
obj_t c_local;
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
gint_t status = bli_trsm_small( side, alpha, a, b, cntx, cntl );
|
||||
if ( status == BLIS_SUCCESS ) return;
|
||||
#endif
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_trsm_check( side, alpha, a, b, &BLIS_ZERO, b, cntx );
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -42,3 +43,16 @@ void bli_trsm_front
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
err_t bli_trsm_small
|
||||
(
|
||||
side_t side,
|
||||
obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* b,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl
|
||||
);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user