Created a 'thunderx2' subdirectory within test/studies

Details:
- Created a 'thunderx2' subdirectory within test/studies to house
  various level-3 test driver used to measure performance on
  ThunderX2.
This commit is contained in:
Devangi N. Parikh
2018-09-20 14:38:50 -04:00
parent d7537fb51d
commit 02adab427c
15 changed files with 3261 additions and 0 deletions

View File

@@ -0,0 +1,654 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas at Austin nor the names
# of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# Makefile
#
# Field G. Van Zee
#
# Makefile for standalone BLIS test drivers.
#
#
# --- Makefile PHONY target definitions ----------------------------------------
#
.PHONY: all \
blis-gemm-st openblas-gemm-st mkl-gemm-st acml-gemm-st \
blis-gemm-mt openblas-gemm-mt mkl-gemm-mt acml-gemm-mt \
blis-syrk-st openblas-syrk-st mkl-syrk-st armpl-syrk-st \
blis-syrk-mt openblas-syrk-mt mkl-syrk-mt armpl-syrk-mt \
blis-hemm-st openblas-hemm-st mkl-hemm-st armpl-hemm-st \
blis-hemm-mt openblas-hemm-mt mkl-hemm-mt armpl-hemm-mt \
blis-trmm-st openblas-trmm-st mkl-trmm-st armpl-trmm-st \
blis-trmm-mt openblas-trmm-mt mkl-trmm-mt armpl-trmm-mt \
clean cleanx
# Comments:
# - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given.
# - We must use recursively expanded assignment for LIB_PATH and INC_PATH in
# the second case because CONFIG_NAME is not yet set.
ifneq ($(strip $(BLIS_INSTALL_PATH)),)
LIB_PATH := $(BLIS_INSTALL_PATH)/lib
INC_PATH := $(BLIS_INSTALL_PATH)/include/blis
SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis
else
DIST_PATH := ../../..
LIB_PATH = ../../../lib/$(CONFIG_NAME)
INC_PATH = ../../../include/$(CONFIG_NAME)
SHARE_PATH := ../../..
endif
#
# --- Include common makefile definitions --------------------------------------
#
# Include the common makefile fragment.
-include $(SHARE_PATH)/common.mk
#
# --- BLAS and LAPACK implementations ------------------------------------------
#
# BLIS library and header path. This is simply wherever it was installed.
#BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
# BLIS library.
#BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
# BLAS library path(s). This is where the BLAS libraries reside.
HOME_LIB_PATH := $(HOME)/OpenBLAS/lib
MKL_LIB_PATH := ${MKLROOT}/lib/intel64
ARMPL_LIB_PATH := /opt/arm/armpl-18.4.0_ThunderX2CN99_Ubuntu-16.04_gcc_7.1.0_aarch64-linux/lib/
ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib
ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib
# OpenBLAS
OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a
OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblas.a
# ATLAS
ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \
$(HOME_LIB_PATH)/libatlas.a
# For ARMPL
ARMPL_LIB := $(ARMPL_LIB_PATH)/libarmpl_lp64.a
ARMPLP_LIB := $(ARMPL_LIB_PATH)/libarmpl_lp64_mp.a
# MKL
MKL_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_sequential \
-lpthread -lm -ldl
#MKLP_LIB := -L$(MKL_LIB_PATH) \
# -lmkl_intel_thread \
# -lmkl_core \
# -lmkl_intel_ilp64 \
# -L$(ICC_LIB_PATH) \
# -liomp5
MKLP_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_gnu_thread \
-lpthread -lm -ldl -fopenmp
#-L$(ICC_LIB_PATH) \
#-lgomp
# ACML
ACML_LIB := -L$(ACML_LIB_PATH) \
-lgfortran -lm -lrt -ldl -lacml
ACMLP_LIB := -L$(ACMLP_LIB_PATH) \
-lgfortran -lm -lrt -ldl -lacml_mp
#
# --- General build definitions ------------------------------------------------
#
TEST_SRC_PATH := .
TEST_OBJ_PATH := .
# Gather all local object files.
TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \
$(TEST_OBJ_PATH)/%.o, \
$(wildcard $(TEST_SRC_PATH)/*.c)))
# Use the "framework" CFLAGS for the configuration family.
CFLAGS := $(call get-frame-cflags-for,$(CONFIG_NAME))
# Add local header paths to CFLAGS.
CFLAGS += -g -I$(TEST_SRC_PATH)
# Locate the libblis library to which we will link.
lIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L)
# Datatype
DT_S := -DDT=BLIS_FLOAT
DT_D := -DDT=BLIS_DOUBLE
DT_C := -DDT=BLIS_SCOMPLEX
DT_Z := -DDT=BLIS_DCOMPLEX
# Which library?
BLI_DEF := -DBLIS
BLA_DEF := -DBLAS
# Complex implementation type
D3MHW := -DIND=BLIS_3MH
D3M1 := -DIND=BLIS_3M1
D4MHW := -DIND=BLIS_4MH
D4M1B := -DIND=BLIS_4M1B
D4M1A := -DIND=BLIS_4M1A
D1M := -DIND=BLIS_1M
DNAT := -DIND=BLIS_NAT
# Implementation string
STR_3MHW := -DSTR=\"3mhw\"
STR_3M1 := -DSTR=\"3m1\"
STR_4MHW := -DSTR=\"4mhw\"
STR_4M1B := -DSTR=\"4m1b\"
STR_4M1A := -DSTR=\"4m1a\"
STR_1M := -DSTR=\"1m\"
STR_NAT := -DSTR=\"asm\"
STR_OBL := -DSTR=\"openblas\"
STR_MKL := -DSTR=\"mkl\"
STR_ACML := -DSTR=\"acml\"
STR_ARMPL:= -DSTR=\"armpl\"
# Single or multithreaded string
STR_ST := -DTHR_STR=\"st\"
STR_MT := -DTHR_STR=\"mt\"
# Problem size specification
PDEF_ST := -DP_BEGIN=40 \
-DP_END=2000 \
-DP_INC=40
PDEF_MT := -DP_BEGIN=200 \
-DP_END=10000 \
-DP_INC=200
#
# --- Targets/rules ------------------------------------------------------------
#
all-st: blis-st openblas-st mkl-st
all-mt: blis-mt openblas-mt mkl-mt
blis-st: blis-gemm-st blis-syrk-st blis-hemm-st blis-trmm-st
blis-mt: blis-gemm-mt blis-syrk-mt blis-hemm-mt blis-trmm-mt
openblas-st: openblas-gemm-st openblas-syrk-st openblas-hemm-st openblas-trmm-st
openblas-mt: openblas-gemm-mt openblas-syrk-mt openblas-hemm-mt openblas-trmm-mt
mkl-st: mkl-gemm-st mkl-syrk-st mkl-hemm-st mkl-trmm-st
mkl-mt: mkl-gemm-mt mkl-syrk-mt mkl-hemm-mt mkl-trmm-mt
armpl-st: armpl-gemm-st armpl-syrk-st armpl-hemm-st armpl-trmm-st
armpl-mt: armpl-gemm-mt armpl-syrk-mt armpl-hemm-mt armpl-trmm-mt
blis-gemm-st: \
test_sgemm_asm_blis_st.x \
test_dgemm_asm_blis_st.x \
\
test_cgemm_1m_blis_st.x \
test_zgemm_1m_blis_st.x \
test_cgemm_asm_blis_st.x \
test_zgemm_asm_blis_st.x
blis-syrk-st: \
test_ssyrk_asm_blis_st.x \
test_dsyrk_asm_blis_st.x \
test_csyrk_1m_blis_st.x \
test_zsyrk_1m_blis_st.x
blis-syrk-mt: \
test_ssyrk_asm_blis_mt.x \
test_dsyrk_asm_blis_mt.x \
test_csyrk_1m_blis_mt.x \
test_zsyrk_1m_blis_mt.x
blis-hemm-st: \
test_shemm_asm_blis_st.x \
test_dhemm_asm_blis_st.x \
test_chemm_1m_blis_st.x \
test_zhemm_1m_blis_st.x
blis-hemm-mt: \
test_shemm_asm_blis_mt.x \
test_dhemm_asm_blis_mt.x \
test_chemm_1m_blis_mt.x \
test_zhemm_1m_blis_mt.x
blis-trmm-st: \
test_strmm_asm_blis_st.x \
test_dtrmm_asm_blis_st.x \
test_ctrmm_1m_blis_st.x \
test_ztrmm_1m_blis_st.x
blis-trmm-mt: \
test_strmm_asm_blis_mt.x \
test_dtrmm_asm_blis_mt.x \
test_ctrmm_1m_blis_mt.x \
test_ztrmm_1m_blis_mt.x
blis-gemm-mt: \
test_sgemm_asm_blis_mt.x \
test_dgemm_asm_blis_mt.x \
\
test_cgemm_1m_blis_mt.x \
test_zgemm_1m_blis_mt.x \
test_cgemm_asm_blis_mt.x \
test_zgemm_asm_blis_mt.x
openblas-gemm-st: \
test_sgemm_openblas_st.x \
test_dgemm_openblas_st.x \
test_cgemm_openblas_st.x \
test_zgemm_openblas_st.x
openblas-gemm-mt: \
test_sgemm_openblas_mt.x \
test_dgemm_openblas_mt.x \
test_cgemm_openblas_mt.x \
test_zgemm_openblas_mt.x
openblas-syrk-st: \
test_ssyrk_openblas_st.x \
test_dsyrk_openblas_st.x \
test_csyrk_openblas_st.x \
test_zsyrk_openblas_st.x
openblas-syrk-mt: \
test_ssyrk_openblas_mt.x \
test_dsyrk_openblas_mt.x \
test_csyrk_openblas_mt.x \
test_zsyrk_openblas_mt.x
openblas-hemm-st: \
test_shemm_openblas_st.x \
test_dhemm_openblas_st.x \
test_chemm_openblas_st.x \
test_zhemm_openblas_st.x
openblas-hemm-mt: \
test_shemm_openblas_mt.x \
test_dhemm_openblas_mt.x \
test_chemm_openblas_mt.x \
test_zhemm_openblas_mt.x
openblas-trmm-st: \
test_strmm_openblas_st.x \
test_dtrmm_openblas_st.x \
test_ctrmm_openblas_st.x \
test_ztrmm_openblas_st.x
openblas-trmm-mt: \
test_strmm_openblas_mt.x \
test_dtrmm_openblas_mt.x \
test_ctrmm_openblas_mt.x \
test_ztrmm_openblas_mt.x
mkl-gemm-st: \
test_sgemm_mkl_st.x \
test_dgemm_mkl_st.x \
test_cgemm_mkl_st.x \
test_zgemm_mkl_st.x
mkl-gemm-mt: \
test_sgemm_mkl_mt.x \
test_dgemm_mkl_mt.x \
test_cgemm_mkl_mt.x \
test_zgemm_mkl_mt.x
mkl-syrk-st: \
test_ssyrk_mkl_st.x \
test_dsyrk_mkl_st.x \
test_csyrk_mkl_st.x \
test_zsyrk_mkl_st.x
mkl-syrk-mt: \
test_ssyrk_mkl_mt.x \
test_dsyrk_mkl_mt.x \
test_csyrk_mkl_mt.x \
test_zsyrk_mkl_mt.x
mkl-hemm-st: \
test_shemm_mkl_st.x \
test_dhemm_mkl_st.x \
test_chemm_mkl_st.x \
test_zhemm_mkl_st.x
mkl-hemm-mt: \
test_shemm_mkl_mt.x \
test_dhemm_mkl_mt.x \
test_chemm_mkl_mt.x \
test_zhemm_mkl_mt.x
mkl-trmm-st: \
test_strmm_mkl_st.x \
test_dtrmm_mkl_st.x \
test_ctrmm_mkl_st.x \
test_ztrmm_mkl_st.x
mkl-trmm-mt: \
test_strmm_mkl_mt.x \
test_dtrmm_mkl_mt.x \
test_ctrmm_mkl_mt.x \
test_ztrmm_mkl_mt.x
armpl-gemm-st: \
test_sgemm_armpl_st.x \
test_dgemm_armpl_st.x \
test_cgemm_armpl_st.x \
test_zgemm_armpl_st.x
armpl-gemm-mt: \
test_sgemm_armpl_mt.x \
test_dgemm_armpl_mt.x \
test_cgemm_armpl_mt.x \
test_zgemm_armpl_mt.x
armpl-syrk-st: \
test_ssyrk_armpl_st.x \
test_dsyrk_armpl_st.x \
test_csyrk_armpl_st.x \
test_zsyrk_armpl_st.x
armpl-syrk-mt: \
test_ssyrk_armpl_mt.x \
test_dsyrk_armpl_mt.x \
test_csyrk_armpl_mt.x \
test_zsyrk_armpl_mt.x
armpl-hemm-st: \
test_shemm_armpl_st.x \
test_dhemm_armpl_st.x \
test_chemm_armpl_st.x \
test_zhemm_armpl_st.x
armpl-hemm-mt: \
test_shemm_armpl_mt.x \
test_dhemm_armpl_mt.x \
test_chemm_armpl_mt.x \
test_zhemm_armpl_mt.x
armpl-trmm-st: \
test_strmm_armpl_st.x \
test_dtrmm_armpl_st.x \
test_ctrmm_armpl_st.x \
test_ztrmm_armpl_st.x
armpl-trmm-mt: \
test_strmm_armpl_mt.x \
test_dtrmm_armpl_mt.x \
test_ctrmm_armpl_mt.x \
test_ztrmm_armpl_mt.x
# --Object file rules --
$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c
$(CC) $(CFLAGS) -c $< -o $@
# blis 3mhw
test_z%_3mhw_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@
test_c%_3mhw_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@
test_z%_3mhw_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@
test_c%_3mhw_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@
# blis 3m1
test_z%_3m1_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@
test_c%_3m1_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@
test_z%_3m1_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@
test_c%_3m1_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@
# blis 4mhw
test_z%_4mhw_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@
test_c%_4mhw_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@
test_z%_4mhw_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@
test_c%_4mhw_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@
# blis 4m1b
test_z%_4m1b_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@
test_c%_4m1b_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@
test_z%_4m1b_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@
test_c%_4m1b_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@
# blis 4m1a
test_z%_4m1a_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@
test_c%_4m1a_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@
test_z%_4m1a_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@
test_c%_4m1a_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@
# blis 1m
test_z%_1m_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@
test_c%_1m_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@
test_z%_1m_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@
test_c%_1m_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@
# blis asm
test_d%_asm_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
test_s%_asm_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
test_z%_asm_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
test_c%_asm_blis_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
test_d%_asm_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
test_s%_asm_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
test_z%_asm_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
test_c%_asm_blis_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
# openblas
test_d%_openblas_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
test_s%_openblas_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
test_z%_openblas_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
test_c%_openblas_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
test_d%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
test_s%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
test_z%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
test_c%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
# mkl
test_d%_mkl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
test_s%_mkl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
test_z%_mkl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
test_c%_mkl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
test_d%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
test_s%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
test_z%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
test_c%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
# armpl
test_d%_armpl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@
test_s%_armpl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@
test_z%_armpl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@
test_c%_armpl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@
test_d%_armpl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@
test_s%_armpl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@
test_z%_armpl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@
test_c%_armpl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@
# mkl
# -- Executable file rules --
# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS
# on the link command line in case BLIS was configured with the BLAS
# compatibility layer. This prevents BLIS from inadvertently getting called
# for the BLAS routines we are trying to test with.
test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK)
$(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK)
$(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_mkl_st.x: test_%_mkl_st.o $(LIBBLIS_LINK)
$(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_mkl_mt.x: test_%_mkl_mt.o $(LIBBLIS_LINK)
$(LINKER) $< $(MKLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_armpl_st.x: test_%_armpl_st.o $(LIBBLIS_LINK)
$(LINKER) $< $(ARMPL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_armpl_mt.x: test_%_armpl_mt.o $(LIBBLIS_LINK)
$(LINKER) $< $(ARMPLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_blis_st.x: test_%_blis_st.o $(LIBBLIS_LINK)
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_blis_mt.x: test_%_blis_mt.o $(LIBBLIS_LINK)
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
# -- Clean rules --
clean: cleanx
cleanx:
- $(RM_F) *.o *.x

View File

@@ -0,0 +1,153 @@
axes1 = subplot(4, 4, 1);
hold(axes1,'on');
axes2 = subplot(4, 4, 5);
hold(axes2,'on');
axes3 = subplot(4, 4, 9);
hold(axes3,'on');
axes4 = subplot(4, 4, 13);
hold(axes4,'on');
addpath(pathname_blis)
if(plot_s)
% SGEMM multi threaded
axes(axes1);
output_mt_sgemm_asm_blis
plot(data_mt_sgemm_asm_blis(:,1), data_mt_sgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
end
% DGEMM multi threaded
if(plot_d)
axes(axes2);
output_mt_dgemm_asm_blis
plot(data_mt_dgemm_asm_blis(:,1), data_mt_dgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
end
% CGEMM multi threaded
if(plot_c)
axes(axes3);
output_mt_cgemm_1m_blis
plot(data_mt_cgemm_1m_blis(:,1), data_mt_cgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
end
% ZGEMM multi threaded
if(plot_z)
axes(axes4);
output_mt_zgemm_1m_blis
plot(data_mt_zgemm_1m_blis(:,1), data_mt_zgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
end
clear *gemm*
rmpath(pathname_blis)
% OpenBLAS
addpath(pathname_openblas)
if(plot_s)
axes(axes1);
output_mt_sgemm_openblas
plot(data_mt_sgemm_openblas(:,1), data_mt_sgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_d)
axes(axes2);
output_mt_dgemm_openblas
plot(data_mt_dgemm_openblas(:,1), data_mt_dgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_c)
axes(axes3);
output_mt_cgemm_openblas
plot(data_mt_cgemm_openblas(:,1), data_mt_cgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_z)
axes(axes4);
output_mt_zgemm_openblas
plot(data_mt_zgemm_openblas(:,1), data_mt_zgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
end
clear *gemm*
rmpath(pathname_openblas)
% ARMPL
addpath(pathname_armpl)
if(plot_s)
axes(axes1);
output_mt_sgemm_armpl
plot(data_mt_sgemm_armpl(:,1), data_mt_sgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_d)
axes(axes2);
output_mt_dgemm_armpl
plot(data_mt_dgemm_armpl(:,1), data_mt_dgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_c)
axes(axes3);
output_mt_cgemm_armpl
plot(data_mt_cgemm_armpl(:,1), data_mt_cgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_z)
axes(axes4);
output_mt_zgemm_armpl
plot(data_mt_zgemm_armpl(:,1), data_mt_zgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
clear *gemm*
rmpath(pathname_armpl)
axes(axes1);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('SGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores] )
axes(axes2);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes2,'on');
set(axes2,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )
axes(axes3);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes3,'on');
set(axes3,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
axes(axes4);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes4,'on');
set(axes4,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )

View File

@@ -0,0 +1,101 @@
addpath(pathname)
output_st_sgemm_asm_blis
output_st_dgemm_asm_blis
output_st_cgemm_1m_blis
output_st_zgemm_1m_blis
output_st_sgemm_openblas
output_st_dgemm_openblas
output_st_cgemm_openblas
output_st_zgemm_openblas
% SGEMM Single threaded
axes1 = subplot(4, 4, 1);
hold(axes1,'on');
plot(data_st_sgemm_asm_blis(:,1), data_st_sgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_sgemm_openblas(:,1), data_st_sgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('SGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% DGEMM Single threaded
axes1 = subplot(4, 4, 5);
hold(axes1,'on');
plot(data_st_dgemm_asm_blis(:,1), data_st_dgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_dgemm_openblas(:,1), data_st_dgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
% CGEMM Single threaded
axes1 = subplot(4, 4, 9);
hold(axes1,'on');
plot(data_st_cgemm_1m_blis(:,1), data_st_cgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_cgemm_openblas(:,1), data_st_cgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% ZGEMM Single threaded
axes1 = subplot(4, 4, 13);
hold(axes1,'on');
plot(data_st_zgemm_1m_blis(:,1), data_st_zgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_zgemm_openblas(:,1), data_st_zgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
clear *gemm*
rmpath(pathname)
addpath(pathname_armpl)
output_st_sgemm_armpl
output_st_dgemm_armpl
output_st_cgemm_armpl
output_st_zgemm_armpl
% SGEMM Single threaded
subplot(4, 4, 1);
plot(data_st_sgemm_armpl(:,1), data_st_sgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 5);
plot(data_st_dgemm_armpl(:,1), data_st_dgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 9);
plot(data_st_cgemm_armpl(:,1), data_st_cgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 13);
plot(data_st_zgemm_armpl(:,1), data_st_zgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
clear *gemm*
rmpath(pathname_armpl)

View File

@@ -0,0 +1,146 @@
axes3 = subplot(4, 4, 3);
hold(axes3,'on');
axes7 = subplot(4, 4, 7);
hold(axes7,'on');
axes11 = subplot(4, 4, 11);
hold(axes11,'on');
axes15 = subplot(4, 4, 15);
hold(axes15,'on');
addpath(pathname_blis)
if(plot_s)
axes(axes3);
output_mt_shemm_asm_blis
plot(data_mt_shemm_asm_blis(:,1), data_mt_shemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_d)
axes(axes7);
output_mt_dhemm_asm_blis
plot(data_mt_dhemm_asm_blis(:,1), data_mt_dhemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_c)
axes(axes11);
output_mt_chemm_1m_blis
plot(data_mt_chemm_1m_blis(:,1), data_mt_chemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_z)
axes(axes15);
output_mt_zhemm_1m_blis
plot(data_mt_zhemm_1m_blis(:,1), data_mt_zhemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
clear *hemm*
rmpath(pathname_blis)
addpath(pathname_openblas)
if(plot_s)
axes(axes3);
output_mt_shemm_openblas
plot(data_mt_shemm_openblas(:,1), data_mt_shemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_d)
axes(axes7);
output_mt_dhemm_openblas
plot(data_mt_dhemm_openblas(:,1), data_mt_dhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_c)
axes(axes11);
output_mt_chemm_openblas
plot(data_mt_chemm_openblas(:,1), data_mt_chemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_z)
axes(axes15);
output_mt_zhemm_openblas
plot(data_mt_zhemm_openblas(:,1), data_mt_zhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
clear *hemm*
rmpath(pathname_openblas)
addpath(pathname_armpl);
if(plot_s)
axes(axes3);
output_mt_shemm_armpl
plot(data_mt_shemm_armpl(:,1), data_mt_shemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_d)
axes(axes7);
output_mt_dhemm_armpl
plot(data_mt_dhemm_armpl(:,1), data_mt_dhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_c)
axes(axes11);
output_mt_chemm_armpl
plot(data_mt_chemm_armpl(:,1), data_mt_chemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_z)
axes(axes15);
output_mt_zhemm_armpl
plot(data_mt_zhemm_armpl(:,1), data_mt_zhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
clear *hemm*
rmpath(pathname_armpl)
% SSYMM multi threaded
axes(axes3);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('SSYMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes3,'on');
set(axes3,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
axes(axes7);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DSYMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes7,'on');
set(axes7,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )
% CHEMM multi threaded
axes(axes11);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CHEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes11,'on');
set(axes11,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
% ZHEMM multi threaded
axes(axes15);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZHEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes15,'on');
set(axes15,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
% legend({'BLIS', 'BLIS (AVX2)', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )

View File

@@ -0,0 +1,98 @@
addpath(pathname)
output_st_shemm_asm_blis
output_st_dhemm_asm_blis
output_st_chemm_1m_blis
output_st_zhemm_1m_blis
output_st_shemm_openblas
output_st_dhemm_openblas
output_st_chemm_openblas
output_st_zhemm_openblas
% SSYMM Single threaded
axes1 = subplot(4, 4, 3);
hold(axes1,'on');
plot(data_st_shemm_asm_blis(:,1), data_st_shemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_shemm_openblas(:,1), data_st_shemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('SSYMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% DSYMM Single threaded
axes1 = subplot(4, 4, 7);
hold(axes1,'on');
plot(data_st_dhemm_asm_blis(:,1), data_st_dhemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_dhemm_openblas(:,1), data_st_dhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DSYMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
% CHEMM Single threaded
axes1 = subplot(4, 4, 11);
hold(axes1,'on');
plot(data_st_chemm_1m_blis(:,1), data_st_chemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_chemm_openblas(:,1), data_st_chemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CHEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% ZHEMM Single threaded
axes1 = subplot(4, 4, 15);
hold(axes1,'on');
plot(data_st_zhemm_1m_blis(:,1), data_st_zhemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_zhemm_openblas(:,1), data_st_zhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZHEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
clear *hemm*
rmpath(pathname)
addpath(pathname_armpl)
output_st_shemm_armpl
output_st_dhemm_armpl
output_st_chemm_armpl
output_st_zhemm_armpl
% Shemm Single threaded
subplot(4, 4, 3);
plot(data_st_shemm_armpl(:,1), data_st_shemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 7);
plot(data_st_dhemm_armpl(:,1), data_st_dhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 11);
plot(data_st_chemm_armpl(:,1), data_st_chemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 15);
plot(data_st_zhemm_armpl(:,1), data_st_zhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
clear *hemm*
rmpath(pathname_armpl)

View File

@@ -0,0 +1,145 @@
axes2 = subplot(4, 4, 2);
hold(axes2,'on');
axes6 = subplot(4, 4, 6);
hold(axes6,'on');
axes10 = subplot(4, 4, 10);
hold(axes10,'on');
axes14 = subplot(4, 4, 14);
hold(axes14,'on');
addpath(pathname_blis)
if(plot_s)
axes(axes2);
output_mt_ssyrk_asm_blis
plot(data_mt_ssyrk_asm_blis(:,1), data_mt_ssyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_d)
axes(axes6);
output_mt_dsyrk_asm_blis
plot(data_mt_dsyrk_asm_blis(:,1), data_mt_dsyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_c)
axes(axes10);
output_mt_csyrk_1m_blis
plot(data_mt_csyrk_1m_blis(:,1), data_mt_csyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_z)
axes(axes14);
output_mt_zsyrk_1m_blis
plot(data_mt_zsyrk_1m_blis(:,1), data_mt_zsyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
clear *syrk*
rmpath(pathname_blis)
% OpenBLAS
addpath(pathname_openblas)
if(plot_s)
axes(axes2);
output_mt_ssyrk_openblas
plot(data_mt_ssyrk_openblas(:,1), data_mt_ssyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_d)
axes(axes6);
output_mt_dsyrk_openblas
plot(data_mt_dsyrk_openblas(:,1), data_mt_dsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_c)
axes(axes10);
output_mt_csyrk_openblas
plot(data_mt_csyrk_openblas(:,1), data_mt_csyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_z)
axes(axes14);
output_mt_zsyrk_openblas
plot(data_mt_zsyrk_openblas(:,1), data_mt_zsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
clear *syrk*
rmpath(pathname_openblas)
% ARMPL
addpath(pathname_armpl)
if(plot_s)
axes(axes2);
output_mt_ssyrk_armpl
plot(data_mt_ssyrk_armpl(:,1), data_mt_ssyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_d)
axes(axes6);
output_mt_dsyrk_armpl
plot(data_mt_dsyrk_armpl(:,1), data_mt_dsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_c)
axes(axes10);
output_mt_csyrk_armpl
plot(data_mt_csyrk_armpl(:,1), data_mt_csyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_z)
axes(axes14);
output_mt_zsyrk_armpl
plot(data_mt_zsyrk_armpl(:,1), data_mt_zsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
clear *syrk*
rmpath(pathname_armpl)
axes(axes2);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('SSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes2,'on');
set(axes2,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
% DSYRK multi threaded
axes(axes6);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes6,'on');
set(axes6,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )
% CSYRK multi threaded
axes(axes10);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes10,'on');
set(axes10,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
% ZSYRK multi threaded
axes(axes14);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes14,'on');
set(axes14,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
% legend({'BLIS', 'BLIS (AVX2)','OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )

View File

@@ -0,0 +1,103 @@
addpath(pathname)
output_st_ssyrk_asm_blis
output_st_dsyrk_asm_blis
output_st_csyrk_1m_blis
output_st_zsyrk_1m_blis
output_st_ssyrk_openblas
output_st_dsyrk_openblas
output_st_csyrk_openblas
output_st_zsyrk_openblas
plot_lower = 0;
% SSYRK Single threaded
axes1 = subplot(4, 4, 2);
hold(axes1,'on');
plot(data_st_ssyrk_asm_blis(:,1), data_st_ssyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_ssyrk_openblas(:,1), data_st_ssyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('SSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% DSYRK single threaded
axes1 = subplot(4, 4, 6);
hold(axes1,'on');
plot(data_st_dsyrk_asm_blis(:,1), data_st_dsyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_dsyrk_openblas(:,1), data_st_dsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
% CSYRK single threaded
axes1 = subplot(4, 4, 10);
hold(axes1,'on');
plot(data_st_csyrk_1m_blis(:,1), data_st_csyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_csyrk_openblas(:,1), data_st_csyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% ZSYRK single threaded
axes1 = subplot(4, 4, 14);
hold(axes1,'on');
plot(data_st_zsyrk_1m_blis(:,1), data_st_zsyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_zsyrk_openblas(:,1), data_st_zsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
% legend({'BLIS', 'BLIS (AVX2)','OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
clear *syrk*
rmpath(pathname)
addpath(pathname_armpl)
output_st_ssyrk_armpl
output_st_dsyrk_armpl
output_st_csyrk_armpl
output_st_zsyrk_armpl
% Ssyrk Single threaded
subplot(4, 4, 2);
plot(data_st_ssyrk_armpl(:,1), data_st_ssyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 6);
plot(data_st_dsyrk_armpl(:,1), data_st_dsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 10);
plot(data_st_csyrk_armpl(:,1), data_st_csyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 14);
plot(data_st_zsyrk_armpl(:,1), data_st_zsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
clear *syrk*
rmpath(pathname_armpl)

View File

@@ -0,0 +1,113 @@
plot_st = 1;
plot_1s = 1;
plot_2s = 1;
plot_s = 1;
plot_d = 1;
plot_c = 1;
plot_z = 1;
plot_armpl = 1;
fontsize = 6;
freq = 2;
sflopspercycle = 16;
dflopspercycle = 8;
speak = sflopspercycle*freq;
dpeak = dflopspercycle*freq;
xmax_mt = 5000;
if(plot_st)
numcores = 1;
fig1 = figure(1);
clf(fig1)
%
pathname = './20180824/';
pathname_armpl = './20180829/';
plot_gemm_st_perf
plot_syrk_st_perf
plot_hemm_st_perf
plot_trmm_st_perf
%fig1.PaperPositionMode = 'auto';
orient(fig1,'landscape')
set(fig1,'PaperUnits','normalized');
set(fig1,'PaperPosition', [0 0 1 1]);
print(fig1, 'thunderx2-st-20180829', '-dpdf')
clear pathname pathname_armpl
end
if (plot_1s)
fig1 = figure(2);
clf;
numcores = 28;
pathname_blis = './20180830/1socket';
pathname_armpl = './20180830/1socket';
pathname_openblas = './20180830/1socket';
%JC = 2, IC = 14
plot_gemm_mt_perf
plot_syrk_mt_perf
plot_hemm_mt_perf
plot_trmm_mt_perf
%fig1.PaperPositionMode = 'auto';
orient(fig1,'landscape')
set(fig1,'PaperUnits','normalized');
set(fig1,'PaperPosition', [0 0 1 1]);
print(fig1, 'thunderx2-mt-28cores-20180830', '-dpdf')
end
if(plot_2s)
numcores = 56;
%JC = 4, IC = 14
fig1 = figure(3);
clf;
plot_gemm = 1;
plot_syrk = 1;
plot_hemm = 1;
plot_trmm = 1;
plot_s = 1;
plot_d = 1;
plot_c = 1;
plot_z = 1;
pathname_blis = './20180830/2sockets';
pathname_openblas = './20180830/2sockets';
pathname_armpl = './20180830/2sockets';
if(plot_gemm)
plot_gemm_mt_perf
end
if(plot_syrk)
plot_syrk_mt_perf
end
if(plot_hemm)
plot_hemm_mt_perf
end
if(plot_trmm)
plot_trmm_mt_perf
end
%fig1.PaperPositionMode = 'auto';
orient(fig1,'landscape')
set(fig1,'PaperUnits','normalized');
set(fig1,'PaperPosition', [0 0 1 1]);
print(fig1, 'thunderx2-mt-56cores-20180830', '-dpdf')
end

View File

@@ -0,0 +1,145 @@
axes4 = subplot(4, 4, 4);
hold(axes4,'on');
axes8 = subplot(4, 4, 8);
hold(axes8,'on');
axes12 = subplot(4, 4, 12);
hold(axes12,'on');
axes16 = subplot(4, 4, 16);
hold(axes16,'on');
addpath(pathname_blis)
if(plot_s)
axes(axes4);
output_mt_strmm_asm_blis
plot(data_mt_strmm_asm_blis(:,1), data_mt_strmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_d)
axes(axes8);
output_mt_dtrmm_asm_blis
plot(data_mt_dtrmm_asm_blis(:,1), data_mt_dtrmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_c)
axes(axes12);
output_mt_ctrmm_1m_blis
plot(data_mt_ctrmm_1m_blis(:,1), data_mt_ctrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
if(plot_z)
axes(axes16);
output_mt_ztrmm_1m_blis
plot(data_mt_ztrmm_1m_blis(:,1), data_mt_ztrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
end
clear *trmm*
rmpath(pathname_blis)
addpath(pathname_openblas)
if(plot_s)
axes(axes4);
output_mt_strmm_openblas
plot(data_mt_strmm_openblas(:,1), data_mt_strmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_d)
axes(axes8);
output_mt_dtrmm_openblas
plot(data_mt_dtrmm_openblas(:,1), data_mt_dtrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_c)
axes(axes12);
output_mt_ctrmm_openblas
plot(data_mt_ctrmm_openblas(:,1), data_mt_ctrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
if(plot_z)
axes(axes16);
output_mt_ztrmm_openblas
plot(data_mt_ztrmm_openblas(:,1), data_mt_ztrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
end
clear *trmm*
rmpath(pathname_openblas)
if(plot_armpl)
addpath(pathname_armpl)
if(plot_s)
axes(axes4);
output_mt_strmm_armpl
plot(data_mt_strmm_armpl(:,1), data_mt_strmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_d)
axes(axes8);
output_mt_dtrmm_armpl
plot(data_mt_dtrmm_armpl(:,1), data_mt_dtrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_c)
axes(axes12);
output_mt_ctrmm_armpl
plot(data_mt_ctrmm_armpl(:,1), data_mt_ctrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
if(plot_z)
axes(axes16);
output_mt_ztrmm_armpl
plot(data_mt_ztrmm_armpl(:,1), data_mt_ztrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
end
clear *trmm*
rmpath(pathname_armpl)
end
axes(axes4);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('STRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes4,'on');
set(axes4,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
axes(axes8);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes8,'on');
set(axes8,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )
axes(axes12);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes12,'on');
set(axes12,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 speak*numcores ] )
axes(axes16);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes16,'on');
set(axes16,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
legend({'BLIS','OpenBLAS'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
v = axis; % extract the current ranges
axis( [ 0 xmax_mt 0 dpeak*numcores ] )
legend({'BLIS','OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');

View File

@@ -0,0 +1,101 @@
addpath(pathname)
output_st_strmm_asm_blis
output_st_dtrmm_asm_blis
output_st_ctrmm_1m_blis
output_st_ztrmm_1m_blis
output_st_strmm_openblas
output_st_dtrmm_openblas
output_st_ctrmm_openblas
output_st_ztrmm_openblas
% STRMM Single threaded
axes1 = subplot(4, 4, 4);
hold(axes1,'on');
plot(data_st_strmm_asm_blis(:,1), data_st_strmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_strmm_openblas(:,1), data_st_strmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('STRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% DTRMM Single threaded
axes1 = subplot(4, 4, 8);
hold(axes1,'on');
plot(data_st_dtrmm_asm_blis(:,1), data_st_dtrmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_dtrmm_openblas(:,1), data_st_dtrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('DTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
% CTRMM Single threaded
axes1 = subplot(4, 4, 12);
hold(axes1,'on');
plot(data_st_ctrmm_1m_blis(:,1), data_st_ctrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_ctrmm_openblas(:,1), data_st_ctrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
%xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('CTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 speak ] )
% ZTRMM Single threaded
axes1 = subplot(4, 4, 16);
hold(axes1,'on');
plot(data_st_ztrmm_1m_blis(:,1), data_st_ztrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]);
plot(data_st_ztrmm_openblas(:,1), data_st_ztrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]);
ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' );
title('ZTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
box(axes1,'on');
set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue');
v = axis; % extract the current ranges
axis( [ 0 v(2) 0 dpeak ] )
clear *trmm*
rmpath(pathname)
addpath(pathname_armpl)
output_st_strmm_armpl
output_st_dtrmm_armpl
output_st_ctrmm_armpl
output_st_ztrmm_armpl
% Strmm Single threaded
subplot(4, 4, 4);
plot(data_st_strmm_armpl(:,1), data_st_strmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 8);
plot(data_st_dtrmm_armpl(:,1), data_st_dtrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 12);
plot(data_st_ctrmm_armpl(:,1), data_st_ctrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
subplot(4, 4, 16);
plot(data_st_ztrmm_armpl(:,1), data_st_ztrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]);
legend({'BLIS','OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South');
clear *trmm*
rmpath(pathname_armpl)

205
test/studies/thunderx2/runme.sh Executable file
View File

@@ -0,0 +1,205 @@
#!/bin/bash
# File pefixes.
exec_root="test"
out_root="output"
out_rootdir=$(date +%Y%m%d)
#out_rootdir=20180830
mkdir -p $out_rootdir
sys="thunderx2"
# Bind threads to processors.
#export OMP_PROC_BIND=true
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55"
unset GOMP_CPU_AFFINITY
# Threading scheme to use when multithreading
if [ ${sys} = "blis" ]; then
jc_nt=1 # 5th loop
ic_nt=4 # 3rd loop
jr_nt=1 # 2nd loop
ir_nt=1 # 1st loop
nt=4
elif [ ${sys} = "thunderx2" ]; then
jc_1_nt=2 # 5th loop
ic_1_nt=14 # 3rd loop
jr_1_nt=1 # 2nd loop
ir_1_nt=1 # 1st loop
nt_1=28
jc_2_nt=4 # 5th loop
ic_2_nt=14 # 3rd loop
jr_2_nt=1 # 2nd loop
ir_2_nt=1 # 1st loop
nt_2=56
fi
# Threadedness to test.
#threads="mt1 mt2"
#threads_r="mt"
#threads="st"
#threads_r="st"
# Datatypes to test.
dts="c z"
dts_r="s d"
# Operations to test.
#l3_ops="gemm syrk hemm trmm"
l3_ops="gemm"
test_ops="${l3_ops}"
test_ops_r="${l3_ops}"
# Complex domain implementations to test.
if [ ${sys} = "blis" ]; then
#test_impls="openblas mkl 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis"
test_impls="openblas 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis"
elif [ ${sys} = "thunderx2" ]; then
#test_impls="openblas"
#test_impls="armpl"
#test_impls="1m_blis armpl"
test_impls="openblas armpl 1m_blis"
fi
# Real domain implementations to test.
test_impls_r="openblas armpl asm_blis"
#test_impls_r="openblas"
#test_impls_r="asm_blis"
#test_impls_r="armpl"
cores_r="1 28 56"
cores="1 28 56"
# First perform real test cases.
for nc in ${cores_r}; do
for dt in ${dts_r}; do
for im in ${test_impls_r}; do
for op in ${test_ops_r}; do
# Set the number of threads according to th.
if [ ${nc} -gt 1 ]; then
# Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread.
if [ ${im} = "openblas" ]; then
unset GOMP_CPU_AFFINITY
elif [ ${im} = "armpl" ]; then
unset GOMP_CPU_AFFINITY
else
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55"
fi
if [ ${nc} -eq 28 ]; then
export BLIS_JC_NT=${jc_1_nt}
export BLIS_IC_NT=${ic_1_nt}
export BLIS_JR_NT=${jr_1_nt}
export BLIS_IR_NT=${ir_1_nt}
export OMP_NUM_THREADS=${nt_1}
out_dir="${out_rootdir}/1socket"
mkdir -p $out_rootdir/1socket
elif [ ${nc} -eq 56 ]; then
export BLIS_JC_NT=${jc_2_nt}
export BLIS_IC_NT=${ic_2_nt}
export BLIS_JR_NT=${jr_2_nt}
export BLIS_IR_NT=${ir_2_nt}
export OMP_NUM_THREADS=${nt_2}
out_dir="${out_rootdir}/2sockets"
mkdir -p $out_rootdir/2sockets
fi
th="mt"
else
export BLIS_NUM_THREADS=1
export OMP_NUM_THREADS=1
out_dir="${out_rootdir}/st"
mkdir -p $out_rootdir/st
th="st"
fi
# Construct the name of the test executable.
exec_name="${exec_root}_${dt}${op}_${im}_${th}.x"
# Construct the name of the output file.
out_file="${out_dir}/${out_root}_${th}_${dt}${op}_${im}.m"
echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}"
# Run executable.
./${exec_name} > ${out_file}
sleep 1
done
done
done
done
# Now perform complex test cases.
for nc in ${cores}; do
for dt in ${dts}; do
for im in ${test_impls}; do
for op in ${test_ops}; do
# Set the number of threads according to th.
if [ ${nc} -gt 1 ]; then
# Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread.
if [ ${im} = "openblas" ]; then
unset GOMP_CPU_AFFINITY
elif [ ${im} = "armpl" ]; then
unset GOMP_CPU_AFFINITY
else
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55"
fi
if [ ${nc} -eq 28 ]; then
export BLIS_JC_NT=${jc_1_nt}
export BLIS_IC_NT=${ic_1_nt}
export BLIS_JR_NT=${jr_1_nt}
export BLIS_IR_NT=${ir_1_nt}
export OMP_NUM_THREADS=${nt_1}
out_dir="${out_rootdir}/1socket"
elif [ ${nc} -eq 56 ]; then
export BLIS_JC_NT=${jc_2_nt}
export BLIS_IC_NT=${ic_2_nt}
export BLIS_JR_NT=${jr_2_nt}
export BLIS_IR_NT=${ir_2_nt}
export OMP_NUM_THREADS=${nt_2}
out_dir="${out_rootdir}/2sockets"
fi
th="mt"
else
export BLIS_NUM_THREADS=1
export OMP_NUM_THREADS=1
out_dir="${out_rootdir}/st"
th="st"
fi
# Construct the name of the test executable.
exec_name="${exec_root}_${dt}${op}_${im}_${th}.x"
# Construct the name of the output file.
out_file="${out_dir}/${out_root}_${th}_${dt}${op}_${im}.m"
echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}"
# Run executable.
./${exec_name} > ${out_file}
sleep 1
done
done
done
done

View File

@@ -0,0 +1,339 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
void zgemm3m_( f77_char*, f77_char*, f77_int*, f77_int*, f77_int*, dcomplex*, dcomplex*, f77_int*, dcomplex*, f77_int*, dcomplex*, dcomplex*, f77_int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n, k;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input, k_input;
ind_t ind;
num_t dt;
char dt_ch;
int r, n_repeats;
trans_t transa;
trans_t transb;
f77_char f77_transa;
f77_char f77_transb;
double dtime;
double dtime_save;
double gflops;
bli_init();
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
n_repeats = 3;
dt = DT;
ind = IND;
p_begin = P_BEGIN;
p_end = P_END;
p_inc = P_INC;
m_input = -1;
n_input = -1;
k_input = -1;
// Supress compiler warnings about unused variable 'ind'.
( void )ind;
#if 0
cntx_t* cntx;
ind_t ind_mod = ind;
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
// Initialize a context for the current induced method and datatype.
cntx = bli_gks_query_ind_cntx( ind_mod, dt );
// Set k to the kc blocksize for the current datatype.
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
#elif 1
//k_input = 256;
#endif
// Choose the char corresponding to the requested datatype.
if ( bli_is_float( dt ) ) dt_ch = 's';
else if ( bli_is_double( dt ) ) dt_ch = 'd';
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
else dt_ch = 'z';
transa = BLIS_NO_TRANSPOSE;
transb = BLIS_NO_TRANSPOSE;
bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
bli_param_map_blis_to_netlib_trans( transb, &f77_transb );
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )0,
( unsigned long )0,
( unsigned long )0, 0.0 );
for ( p = p_begin; p <= p_end; p += p_inc )
{
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
if ( k_input < 0 ) k = p / ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
bli_obj_create( dt, 1, 1, 0, 0, &beta );
bli_obj_create( dt, m, k, 0, 0, &a );
bli_obj_create( dt, k, n, 0, 0, &b );
bli_obj_create( dt, m, n, 0, 0, &c );
//bli_obj_create( dt, m, k, 2, 2*m, &a );
//bli_obj_create( dt, k, n, 2, 2*k, &b );
//bli_obj_create( dt, m, n, 2, 2*m, &c );
bli_obj_create( dt, m, n, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_obj_set_conjtrans( transa, &a );
bli_obj_set_conjtrans( transb, &b );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
#ifdef BLIS
bli_ind_disable_all_dt( dt );
bli_ind_enable_dt( ind, dt );
#endif
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "b", &b, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
bli_gemm( &alpha,
&a,
&b,
&beta,
&c );
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
sgemm_( &f77_transa,
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
dgemm_( &f77_transa,
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* bp = bli_obj_buffer( &b );
scomplex* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
cgemm_( &f77_transa,
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* bp = bli_obj_buffer( &b );
dcomplex* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zgemm_( &f77_transa,
//zgemm3m_( &f77_transa,
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )k,
( unsigned long )n, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}

View File

@@ -0,0 +1,332 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
void zgemm3m_( f77_char*, f77_char*, f77_int*, f77_int*, f77_int*, dcomplex*, dcomplex*, f77_int*, dcomplex*, f77_int*, dcomplex*, dcomplex*, f77_int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
ind_t ind;
num_t dt;
char dt_ch;
int r, n_repeats;
side_t side;
uplo_t uploa;
f77_char f77_side;
f77_char f77_uploa;
double dtime;
double dtime_save;
double gflops;
//bli_init();
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
n_repeats = 3;
dt = DT;
ind = IND;
p_begin = P_BEGIN;
p_end = P_END;
p_inc = P_INC;
m_input = -1;
n_input = -1;
// Supress compiler warnings about unused variable 'ind'.
( void )ind;
#if 0
cntx_t* cntx;
ind_t ind_mod = ind;
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
// Initialize a context for the current induced method and datatype.
cntx = bli_gks_query_ind_cntx( ind_mod, dt );
// Set k to the kc blocksize for the current datatype.
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
#elif 1
//k_input = 256;
#endif
// Choose the char corresponding to the requested datatype.
if ( bli_is_float( dt ) ) dt_ch = 's';
else if ( bli_is_double( dt ) ) dt_ch = 'd';
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
else dt_ch = 'z';
side = BLIS_LEFT;
uploa = BLIS_LOWER;
bli_param_map_blis_to_netlib_side( side, &f77_side );
bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )0,
( unsigned long )0, 0.0 );
for ( p = p_begin; p <= p_end; p += p_inc )
{
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
bli_obj_create( dt, 1, 1, 0, 0, &beta );
if (bli_is_left(side))
bli_obj_create( dt, m, m, 0, 0, &a );
else
bli_obj_create( dt, n, n, 0, 0, &a );
bli_obj_create( dt, m, n, 0, 0, &b );
bli_obj_create( dt, m, n, 0, 0, &c );
bli_obj_create( dt, m, n, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_obj_set_struc( BLIS_HERMITIAN, &a );
bli_obj_set_uplo( uploa, &a );
bli_mkherm( &a );
bli_mktrim( &a );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
#ifdef BLIS
bli_ind_disable_all_dt( dt );
bli_ind_enable_dt( ind, dt );
#endif
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "b", &b, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
bli_hemm( side,
&alpha,
&a,
&b,
&beta,
&c );
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
ssymm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
dsymm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* bp = bli_obj_buffer( &b );
scomplex* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
chemm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* bp = bli_obj_buffer( &b );
dcomplex* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zhemm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
if ( bli_is_left(side) )
gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
else
gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
//bli_finalize();
return 0;
}

View File

@@ -0,0 +1,310 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, k;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, k_input;
ind_t ind;
num_t dt;
char dt_ch;
int r, n_repeats;
trans_t transa;
uplo_t uploc;
f77_char f77_transa;
f77_char f77_uploc;
double dtime;
double dtime_save;
double gflops;
//bli_init();
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
n_repeats = 3;
dt = DT;
ind = IND;
p_begin = P_BEGIN;
p_end = P_END;
p_inc = P_INC;
m_input = -1;
k_input = -1;
// Supress compiler warnings about unused variable 'ind'.
( void )ind;
#if 0
cntx_t* cntx;
ind_t ind_mod = ind;
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
// Initialize a context for the current induced method and datatype.
cntx = bli_gks_query_ind_cntx( ind_mod, dt );
// Set k to the kc blocksize for the current datatype.
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
#elif 1
//k_input = 256;
#endif
// Choose the char corresponding to the requested datatype.
if ( bli_is_float( dt ) ) dt_ch = 's';
else if ( bli_is_double( dt ) ) dt_ch = 'd';
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
else dt_ch = 'z';
transa = BLIS_NO_TRANSPOSE;
uploc = BLIS_LOWER;
bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
bli_param_map_blis_to_netlib_uplo ( uploc, &f77_uploc );
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_%s_%csyrk_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%csyrk_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )0,
( unsigned long )0, 0.0 );
for ( p = p_begin; p <= p_end; p += p_inc )
{
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( k_input < 0 ) k = p / ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
bli_obj_create( dt, 1, 1, 0, 0, &beta );
bli_obj_create( dt, m, k, 0, 0, &a );
bli_obj_create( dt, m, m, 0, 0, &c );
//bli_obj_create( dt, m, k, 2, 2*m, &a );
//bli_obj_create( dt, k, n, 2, 2*k, &b );
//bli_obj_create( dt, m, n, 2, 2*m, &c );
bli_obj_create( dt, m, m, 0, 0, &c_save );
bli_randm( &a );
bli_obj_set_struc( BLIS_SYMMETRIC, &c );
bli_obj_set_uplo ( uploc, &c );
bli_randm( &c );
bli_obj_set_conjtrans( transa, &a );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
#ifdef BLIS
bli_ind_disable_all_dt( dt );
bli_ind_enable_dt( ind, dt );
#endif
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
bli_syrk( &alpha,
&a,
&beta,
&c );
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
ssyrk_( &f77_uploc,
&f77_transa,
&mm,
&kk,
alphap,
ap, &lda,
betap,
cp, &ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
dsyrk_( &f77_uploc,
&f77_transa,
&mm,
&kk,
alphap,
ap, &lda,
betap,
cp, &ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
csyrk_( &f77_uploc,
&f77_transa,
&mm,
&kk,
alphap,
ap, &lda,
betap,
cp, &ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zsyrk_( &f77_uploc,
&f77_transa,
&mm,
&kk,
alphap,
ap, &lda,
betap,
cp, &ldc );
}
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 1.0 * m * m * k ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_%s_%csyrk_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%csyrk_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )k, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
//bli_finalize();
return 0;
}

View File

@@ -0,0 +1,316 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, c;
obj_t c_save;
obj_t alpha;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
ind_t ind;
num_t dt;
char dt_ch;
int r, n_repeats;
side_t side;
uplo_t uploa;
trans_t transa;
diag_t diaga;
f77_char f77_side;
f77_char f77_uploa;
f77_char f77_transa;
f77_char f77_diaga;
double dtime;
double dtime_save;
double gflops;
//bli_init();
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
n_repeats = 3;
dt = DT;
ind = IND;
p_begin = P_BEGIN;
p_end = P_END;
p_inc = P_INC;
m_input = -1;
n_input = -1;
// Supress compiler warnings about unused variable 'ind'.
( void )ind;
#if 0
cntx_t* cntx;
ind_t ind_mod = ind;
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
// Initialize a context for the current induced method and datatype.
cntx = bli_gks_query_ind_cntx( ind_mod, dt );
// Set k to the kc blocksize for the current datatype.
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
#elif 1
//k_input = 256;
#endif
// Choose the char corresponding to the requested datatype.
if ( bli_is_float( dt ) ) dt_ch = 's';
else if ( bli_is_double( dt ) ) dt_ch = 'd';
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
else dt_ch = 'z';
side = BLIS_LEFT;
uploa = BLIS_LOWER;
transa = BLIS_NO_TRANSPOSE;
diaga = BLIS_NONUNIT_DIAG;
bli_param_map_blis_to_netlib_side( side, &f77_side );
bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )0,
( unsigned long )0, 0.0 );
for ( p = p_begin; p <= p_end; p += p_inc )
{
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
if ( bli_is_left( side ) )
bli_obj_create( dt, m, m, 0, 0, &a );
else
bli_obj_create( dt, n, n, 0, 0, &a );
bli_obj_create( dt, m, n, 0, 0, &c );
bli_obj_create( dt, m, n, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &c );
bli_obj_set_struc( BLIS_TRIANGULAR, &a );
bli_obj_set_uplo( uploa, &a );
bli_obj_set_conjtrans( transa, &a );
bli_obj_set_diag( diaga, &a );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_copym( &c, &c_save );
#ifdef BLIS
bli_ind_disable_all_dt( dt );
bli_ind_enable_dt( ind, dt );
#endif
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "b", &b, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
bli_trmm( side,
&alpha,
&a,
&c );
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* cp = bli_obj_buffer( &c );
strmm_( &f77_side,
&f77_uploa,
&f77_transa,
&f77_diaga,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* cp = bli_obj_buffer( &c );
dtrmm_( &f77_side,
&f77_uploa,
&f77_transa,
&f77_diaga,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* cp = bli_obj_buffer( &c );
ctrmm_( &f77_side,
&f77_uploa,
&f77_transa,
&f77_diaga,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* cp = bli_obj_buffer( &c );
ztrmm_( &f77_side,
&f77_uploa,
&f77_transa,
&f77_diaga,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
}
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
if ( bli_is_left(side) )
gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
else
gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, gflops );
bli_obj_free( &alpha );
bli_obj_free( &a );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
//bli_finalize();
return 0;
}