Files
blis/test/Makefile
Field G. Van Zee 16813335bd Merge branch 'amd' into rt
Details:
- Merged contributions made by AMD via 'amd' branch (see summary below).
  Special thanks to AMD for their contributions to-date, especially with
  regard to intrinsic- and assembly-based kernels.
- Added column storage output cases to microkernels in
  bli_gemm_zen_asm_d6x8.c and bli_gemmtrsm_l_zen_asm_d6x8.c. Even with
  the extra cost of transposing the microtile in registers, this is
  much faster than using the general storage case when the underlying
  matrix is column-stored.
- Added s and d assembly-based zen gemmtrsm_u microkernel (including
  column storage optimization mentioned above).
- Updated zen sub-configuration to reflect presence of new native
  kernels.
- Temporarily reverted zen sub-configuration's level-3 cache blocksizes
  to smaller haswell values.
- Temporarily disabled small matrix handling for zen configuration
  family in config/zen/bli_family_zen.h.
- Updated zen CFLAGS according to changes in 1e4365b.
- Updated haswell microkernels such that:
  - only one vzeroupper instruction is called prior to returning
  - movapd/movupd are used in leiu of movaps/movups for double-real
    microkernels. (Note that single-real microkernels still use
    movaps/movups.)
- Added kernel prototypes to kernels/zen/bli_kernels_zen.h, which is
  now included via frame/include/bli_arch_config.h.
- Minor updates to bli_amaxv_ref.c (and to inlined "test" implementation
  in testsuite/src/test_amaxv.c).
- Added early return for alpha == 0 in bli_dotxv_ref.c.
- Integrated changes from f07b176, including a fix for undefined
  behavior when executing the 1m method under certain conditions.
- Updated config_registry; no longer need haswell kernels for zen
  sub-configuration.
- Tweaked marginal and pass thresholds for dotxf.
- Reformatted level-1v, -1f, and -3 amd kernels and inserted additional
  comments.
- Updated LICENSE file to explicitly mention that parts are copyright
  UT-Austin and AMD.
- Added AMD copyright to header templates in build/templates.

Summary of previous changes from 'amd' branch.
- Added s and d assembly-based zen gemm microkernels (d6x8 and d8x6) and
  s and d assembly-based zen gemmtrsm_l microkernels (d6x8).
- Added s and d intrinsics-based zen kernels for amaxv, axpyv, dotv, dotxv,
  and scalv, with extra-unrolling variants for axpyv and scalv.
- Added a small matrix handler to bli_gemm_front(), with the handler
  implemented in kernels/zen/3/bli_gemm_small_matrix.c.
- Added additional logic to sumsqv that first attempts to compute the
  sum of the squares via dotv(). If there is a floating-point exception
  (FE_OVERFLOW), then the previous (numerically conservative) code is
  used; otherwise, the result of dotv() is square-rooted and stored as
  the result. This new implementation is only enabled when FE_OVERFLOW
  is #defined. If the macro is not #defined, then the previous
  implementation is used.
- Added axpyv and dotv standalone test drivers to test directory.
- Added zen support to old cpuid_x86.c driver in build/auto-detect/old.
- Added thread-local and __attribute__-related macros to bli_macro_defs.h.
2018-02-21 17:43:32 -06:00

331 lines
8.9 KiB
Makefile

#
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2017, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas at Austin nor the names
# of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# Makefile
#
# Field G. Van Zee
#
# Makefile for standalone BLIS test drivers.
#
#
# --- Makefile PHONY target definitions ----------------------------------------
#
.PHONY: all \
blis openblas atlas mkl \
clean cleanx
#
# --- Distribution path override -----------------------------------------------
#
# Override the default DIST_PATH and BUILD_PATH values so that make can find
# the source distribution and build location.
DIST_PATH := ..
BUILD_PATH := ..
#
# --- Include common makefile definitions --------------------------------------
#
# Define the name of the common makefile fragment.
COMMON_MK_FILE := common.mk
# Construct the path to the makefile configuration file that was generated by
# the configure script.
COMMON_MK_PATH := $(DIST_PATH)/$(COMMON_MK_FILE)
# Include the common makefile fragment.
-include $(COMMON_MK_PATH)
# Detect whether we actually got the common makefile fragment. If we didn't,
# then it is likely that the user has not yet generated it (via configure).
ifeq ($(strip $(COMMON_MK_INCLUDED)),yes)
COMMON_MK_PRESENT := yes
else
COMMON_MK_PRESENT := no
endif
#
# --- BLAS and LAPACK implementations ------------------------------------------
#
# BLIS library and header path. This is simply wherever it was installed.
BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
# BLIS library.
BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
# BLAS library path(s). This is where the BLAS libraries reside.
BLAS_LIB_PATH := $(HOME)/flame/lib
#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
ESSL_LIB_PATH := $(HOME)/path/to/essl/changeme
# OpenBLAS
OPENBLAS_LIB := $(BLAS_LIB_PATH)/libopenblas.a
# ATLAS
ATLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \
$(BLAS_LIB_PATH)/libatlas.a
# MKL
MKL_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_sequential \
-lpthread -lm -ldl
# ESSL
# Note: ESSL is named differently for SMP and/or BG
ESSL_TYPE := # This is the 32b library on POWER
#ESSL_TYPE := 6464 # This is the 64b library on POWER
#ESSL_TYPE := bg # This is the 32b single-threaded library on Blue Gene
#ESSL_TYPE := smpbg # This is the 32b multi-threaded library on Blue Gene
ESSL_LIB := $(ESSL_LIB_PATH)/libessl$(ESSL_TYPE).a
# Accelerate
MAC_LIB := -framework Accelerate
#
# --- General build definitions ------------------------------------------------
#
TEST_SRC_PATH := .
TEST_OBJ_PATH := .
# Gather all local object files.
TEST_OBJS := $(patsubst $(TEST_SRC_PATH)/%.c, \
$(TEST_OBJ_PATH)/%.o, \
$(wildcard $(TEST_SRC_PATH)/*.c))
# Override CFLAGS from make_defs.mk here, if desired.
#CFLAGS := -g -O2 -march=native
# Add installed and local header paths to CFLAGS
CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH)
LINKER := $(CC)
LDFLAGS := #-L/home/00146/field/gnu/gcc-4.8.2/lib64
LDFLAGS += -lgfortran -lm -lpthread -fopenmp
#
# --- Targets/rules ------------------------------------------------------------
#
# Complete list of possible targets when defining 'all':
#
# blis openblas atlas mkl mac essl
#
#all: blis openblas atlas mkl
all: blis openblas mkl
blis: test_dotv_blis.x \
test_axpyv_blis.x \
test_gemv_blis.x \
test_ger_blis.x \
test_hemv_blis.x \
test_her_blis.x \
test_her2_blis.x \
test_trmv_blis.x \
test_trsv_blis.x \
\
test_gemm_blis.x \
test_hemm_blis.x \
test_herk_blis.x \
test_her2k_blis.x \
test_trmm_blis.x \
test_trsm_blis.x
openblas: \
test_dotv_openblas.x \
test_axpyv_openblas.x \
test_gemv_openblas.x \
test_ger_openblas.x \
test_hemv_openblas.x \
test_her_openblas.x \
test_her2_openblas.x \
test_trmv_openblas.x \
test_trsv_openblas.x \
\
test_gemm_openblas.x \
test_hemm_openblas.x \
test_herk_openblas.x \
test_her2k_openblas.x \
test_trmm_openblas.x \
test_trsm_openblas.x
atlas: \
test_dotv_atlas.x \
test_axpyv_atlas.x \
test_gemv_atlas.x \
test_ger_atlas.x \
test_hemv_atlas.x \
test_her_atlas.x \
test_her2_atlas.x \
test_trmv_atlas.x \
test_trsv_atlas.x \
\
test_gemm_atlas.x \
test_hemm_atlas.x \
test_herk_atlas.x \
test_her2k_atlas.x \
test_trmm_atlas.x \
test_trsm_atlas.x
mkl: test_dotv_mkl.x \
test_axpyv_mkl.x \
test_gemv_mkl.x \
test_ger_mkl.x \
test_hemv_mkl.x \
test_her_mkl.x \
test_her2_mkl.x \
test_trmv_mkl.x \
test_trsv_mkl.x \
\
test_gemm_mkl.x \
test_hemm_mkl.x \
test_herk_mkl.x \
test_her2k_mkl.x \
test_trmm_mkl.x \
test_trsm_mkl.x
essl: test_dotv_essl.x \
test_axpyv_essl.x \
test_gemv_essl.x \
test_ger_essl.x \
test_hemv_essl.x \
test_her_essl.x \
test_her2_essl.x \
test_trmv_essl.x \
test_trsv_essl.x \
\
test_gemm_essl.x \
test_hemm_essl.x \
test_herk_essl.x \
test_her2k_essl.x \
test_trmm_essl.x \
test_trsm_essl.x
mac: test_dotv_mac.x \
test_axpyv_mac.x \
test_gemv_mac.x \
test_ger_mac.x \
test_hemv_mac.x \
test_her_mac.x \
test_her2_mac.x \
test_trmv_mac.x \
test_trsv_mac.x \
\
test_gemm_mac.x \
test_hemm_mac.x \
test_herk_mac.x \
test_her2k_mac.x \
test_trmm_mac.x \
test_trsm_mac.x
# --Object file rules --
$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c
$(CC) $(CFLAGS) -c $< -o $@
test_%_openblas.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"openblas\" -c $< -o $@
test_%_atlas.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"atlas\" -c $< -o $@
test_%_mkl.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"mkl\" -c $< -o $@
test_%_essl.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"essl\" -c $< -o $@
test_%_mac.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"mac\" -c $< -o $@
test_%_blis.o: test_%.c
$(CC) $(CFLAGS) -DBLIS -c $< -o $@
# -- Executable file rules --
# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS
# on the link command line in case BLIS was configured with the BLAS
# compatibility layer. This prevents BLIS from inadvertently getting called
# for the BLAS routines we are trying to test with.
test_%_openblas.x: test_%_openblas.o $(BLIS_LIB)
$(LINKER) $< $(OPENBLAS_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_atlas.x: test_%_atlas.o $(BLIS_LIB)
$(LINKER) $< $(ATLAS_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_mkl.x: test_%_mkl.o $(BLIS_LIB)
$(LINKER) $< $(MKL_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_essl.x: test_%_essl.o $(BLIS_LIB)
$(LINKER) $< $(ESSL_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_mac.x: test_%_mac.o $(BLIS_LIB)
$(LINKER) $< $(MAC_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_blis.x: test_%_blis.o $(BLIS_LIB)
$(LINKER) $< $(BLIS_LIB) $(LDFLAGS) -o $@
# -- Clean rules --
clean: cleanx
cleanx:
- $(RM_F) *.o *.x