mirror of
https://github.com/amd/blis.git
synced 2026-05-04 14:31:12 +00:00
Details: - NOTE: This is a merge commit of 'master' of git://github.com/amd/blis into 'amd-master' of flame/blis. - Fixed a bug in the downstream value of BLIS_NUM_ARCHS, which was inadvertantly not incremented when the Zen2 subconfiguration was added. - In bli_gemm_front(), added a missing conditional constraint around the call to bli_gemm_small() that ensures that the computation precision of C matches the storage precision of C. - In bli_syrk_front(), reorganized and relocated the notrans/trans logic that existed around the call to bli_syrk_small() into bli_syrk_small() to minimize the calling code footprint and also to bring that code into stylistic harmony with similar code in bli_gemm_front() and bli_trsm_front(). Also, replaced direct accessing of obj_t fields with proper accessor static functions (e.g. 'a->dim[0]' becomes 'bli_obj_length( a )'). - Added #ifdef BLIS_ENABLE_SMALL_MATRIX guard around prototypes for bli_gemm_small(), bli_syrk_small(), and bli_trsm_small(). This is strictly speaking unnecessary, but it serves as a useful visual cue to those who may be reading the files. - Removed cpp macro-protected small matrix debugging code from bli_trsm_front.c. - Added a GCC_OT_9_1_0 variable to build/config.mk.in to facilitate gcc version check for availability of -march=znver2, and added appropriate support to configure script. - Cleanups to compiler flags common to recent AMD microarchitectures in config/zen/amd_config.mk, including: removal of -march=znver1 et al. from CKVECFLAGS (since the -march flag is added within make_defs.mk); setting CRVECFLAGS similarly to CKVECFLAGS. - Cleanups to config/zen/bli_cntx_init_zen.c. - Cleanups, added comments to config/zen/make_defs.mk. - Cleanups to config/zen2/make_defs.mk, including making use of newly- added GCC_OT_9_1_0 and existing GCC_OT_6_1_0 to choose the correct set of compiler flags based on the version of gcc being used. - Reverted downstream changes to test/test_gemm.c. - Various whitespace/comment changes.
84 lines
3.0 KiB
Makefile
84 lines
3.0 KiB
Makefile
#
|
|
#
|
|
# BLIS
|
|
# An object-based framework for developing high-performance BLAS-like
|
|
# libraries.
|
|
#
|
|
# Copyright (C) 2019, Advanced Micro Devices, Inc.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
# - Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# - Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
|
# contributors may be used to endorse or promote products derived
|
|
# from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
#
|
|
|
|
# All the common flags for AMD architectures will be added here
|
|
|
|
# NOTE: The build system will append these variables with various
|
|
# general-purpose/configuration-agnostic flags in common.mk. You
|
|
# may specify additional flags here as needed.
|
|
CPPROCFLAGS :=
|
|
CMISCFLAGS :=
|
|
CPICFLAGS :=
|
|
CWARNFLAGS :=
|
|
|
|
ifneq ($(DEBUG_TYPE),off)
|
|
CDBGFLAGS := -g
|
|
endif
|
|
|
|
ifeq ($(DEBUG_TYPE),noopt)
|
|
COPTFLAGS := -O0
|
|
else
|
|
COPTFLAGS := -O3 -fomit-frame-pointer
|
|
endif
|
|
|
|
# Flags specific to optimized kernels.
|
|
CKOPTFLAGS := $(COPTFLAGS)
|
|
ifeq ($(CC_VENDOR),gcc)
|
|
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma
|
|
else
|
|
ifeq ($(CC_VENDOR),clang)
|
|
#CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1 -mno-fma4 -mno-tbm -mno-xop -mno-lwp
|
|
CKVECFLAGS := -mavx2 -mfpmath=sse -mfma
|
|
# When compiling with AOCC, add these flags to the default flags set above.
|
|
ifeq ($(strip $(shell clang -v |& head -1 | grep -c 'AOCC.LLVM.2.0.0')),1)
|
|
CKVECFLAGS += -mllvm -disable-licm-vrp
|
|
endif
|
|
else
|
|
$(error gcc or clang are required for this configuration.)
|
|
endif
|
|
endif
|
|
|
|
# Flags specific to reference kernels.
|
|
CROPTFLAGS := $(CKOPTFLAGS)
|
|
ifeq ($(CC_VENDOR),gcc)
|
|
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
|
else
|
|
ifeq ($(CC_VENDOR),clang)
|
|
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
|
|
else
|
|
CRVECFLAGS := $(CKVECFLAGS)
|
|
endif
|
|
endif
|
|
|