mirror of
https://github.com/amd/blis.git
synced 2026-03-21 17:57:23 +00:00
Overhauled test/3m4m Makefile and scripts.
Details: - Rewrote much of Makefile to generate executables for single- and dual- socket multithreading as well as single-threaded. Each of the three can also use a different problem size range/increment, as is often appropriate when doubling/halving the number of threads. - Rewrote runme.sh script to flexibly execute as many threading parameter scenarios as is given in the input parameter string (currently set within the script itself). The string also encodes the maximum problem size for each threading scenario, which is used to identify the executable to run. Also improved the "progress" output of the script to reduce redundant info and improve readability in terminals that are not especially wide. - Minor updates to test_*.c source files. - Updated matlab scripts according to changes made to the Makefile, test drivers, and runme.sh script, and renamed 'plot_all.m' to 'runme.m'.
This commit is contained in:
@@ -46,8 +46,6 @@
|
||||
#
|
||||
|
||||
.PHONY: all \
|
||||
blis-gemm-st openblas-gemm-st mkl-gemm-st acml-gemm-st \
|
||||
blis-gemm-mt openblas-gemm-mt mkl-gemm-mt acml-gemm-mt \
|
||||
clean cleanx
|
||||
|
||||
|
||||
@@ -94,21 +92,19 @@ endif
|
||||
#BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
|
||||
|
||||
# BLAS library path(s). This is where the BLAS libraries reside.
|
||||
HOME_LIB_PATH := $(HOME)/flame/lib
|
||||
#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64
|
||||
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
|
||||
#MKL_LIB_PATH := ${MKLROOT}/lib/intel64
|
||||
HOME_LIB_PATH := $(HOME)/flame/lib
|
||||
#VENDOR_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64
|
||||
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
|
||||
#VENDOR_LIB_PATH := ${MKLROOT}/lib/intel64
|
||||
#ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64
|
||||
ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib
|
||||
ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib
|
||||
|
||||
# OpenBLAS
|
||||
OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a
|
||||
OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a
|
||||
|
||||
# ATLAS
|
||||
ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \
|
||||
$(HOME_LIB_PATH)/libatlas.a
|
||||
#ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \
|
||||
# $(HOME_LIB_PATH)/libatlas.a
|
||||
|
||||
# MKL
|
||||
MKL_LIB := -L$(MKL_LIB_PATH) \
|
||||
@@ -130,13 +126,29 @@ MKLP_LIB := -L$(MKL_LIB_PATH) \
|
||||
#-L$(ICC_LIB_PATH) \
|
||||
#-lgomp
|
||||
|
||||
# ACML
|
||||
ACML_LIB := -L$(ACML_LIB_PATH) \
|
||||
-lgfortran -lm -lrt -ldl -lacml
|
||||
ACMLP_LIB := -L$(ACMLP_LIB_PATH) \
|
||||
-lgfortran -lm -lrt -ldl -lacml_mp
|
||||
VENDOR_LIB := $(MKL_LIB)
|
||||
VENDORP_LIB := $(MKLP_LIB)
|
||||
|
||||
|
||||
#
|
||||
# --- Problem size definitions -------------------------------------------------
|
||||
#
|
||||
|
||||
# Single core (single-threaded)
|
||||
PS_BEGIN := 40
|
||||
PS_MAX := 2000
|
||||
PS_INC := 40
|
||||
|
||||
# Single-socket (multithreaded)
|
||||
P1_BEGIN := 120
|
||||
P1_MAX := 6000
|
||||
P1_INC := 120
|
||||
|
||||
# Dual-socket (multithreaded)
|
||||
P2_BEGIN := 160
|
||||
P2_MAX := 8000
|
||||
P2_INC := 160
|
||||
|
||||
|
||||
#
|
||||
# --- General build definitions ------------------------------------------------
|
||||
@@ -165,12 +177,6 @@ CFLAGS += -I$(TEST_SRC_PATH)
|
||||
LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L)
|
||||
|
||||
|
||||
# Datatype
|
||||
DT_S := -DDT=BLIS_FLOAT
|
||||
DT_D := -DDT=BLIS_DOUBLE
|
||||
DT_C := -DDT=BLIS_SCOMPLEX
|
||||
DT_Z := -DDT=BLIS_DCOMPLEX
|
||||
|
||||
# Which library?
|
||||
BLI_DEF := -DBLIS
|
||||
BLA_DEF := -DBLAS
|
||||
@@ -185,29 +191,25 @@ D1M := -DIND=BLIS_1M
|
||||
DNAT := -DIND=BLIS_NAT
|
||||
|
||||
# Implementation string
|
||||
STR_3MHW := -DSTR=\"3mhw\"
|
||||
STR_3M1 := -DSTR=\"3m1\"
|
||||
STR_4MHW := -DSTR=\"4mhw\"
|
||||
STR_4M1B := -DSTR=\"4m1b\"
|
||||
STR_4M1A := -DSTR=\"4m1a\"
|
||||
STR_1M := -DSTR=\"1m\"
|
||||
STR_NAT := -DSTR=\"asm\"
|
||||
#STR_3MHW := -DSTR=\"3mhw\"
|
||||
#STR_3M1 := -DSTR=\"3m1\"
|
||||
#STR_4MHW := -DSTR=\"4mhw\"
|
||||
#STR_4M1B := -DSTR=\"4m1b\"
|
||||
#STR_4M1A := -DSTR=\"4m1a\"
|
||||
#STR_1M := -DSTR=\"1m\"
|
||||
STR_NAT := -DSTR=\"asm_blis\"
|
||||
STR_OBL := -DSTR=\"openblas\"
|
||||
STR_MKL := -DSTR=\"mkl\"
|
||||
STR_ACML := -DSTR=\"acml\"
|
||||
STR_VEN := -DSTR=\"vendor\"
|
||||
|
||||
# Single or multithreaded string
|
||||
STR_ST := -DTHR_STR=\"st\"
|
||||
STR_MT := -DTHR_STR=\"mt\"
|
||||
STR_1S := -DTHR_STR=\"1s\"
|
||||
STR_2S := -DTHR_STR=\"2s\"
|
||||
|
||||
# Problem size specification
|
||||
PDEF_ST := -DP_BEGIN=56 \
|
||||
-DP_END=2800 \
|
||||
-DP_INC=56
|
||||
|
||||
PDEF_MT := -DP_BEGIN=160 \
|
||||
-DP_END=8000 \
|
||||
-DP_INC=160
|
||||
PDEF_ST := -DP_BEGIN=$(PS_BEGIN) -DP_INC=$(PS_INC) -DP_MAX=$(PS_MAX)
|
||||
PDEF_1S := -DP_BEGIN=$(P1_BEGIN) -DP_INC=$(P1_INC) -DP_MAX=$(P1_MAX)
|
||||
PDEF_2S := -DP_BEGIN=$(P2_BEGIN) -DP_INC=$(P2_INC) -DP_MAX=$(P2_MAX)
|
||||
|
||||
|
||||
|
||||
@@ -215,340 +217,129 @@ PDEF_MT := -DP_BEGIN=160 \
|
||||
# --- Targets/rules ------------------------------------------------------------
|
||||
#
|
||||
|
||||
all: all-st all-mt
|
||||
blis: blis-st blis-mt
|
||||
openblas: openblas-st openblas-mt
|
||||
mkl: mkl-st mkl-mt
|
||||
all: all-st all-1s all-2s
|
||||
blis: blis-st blis-1s blis-2s
|
||||
openblas: openblas-st openblas-1s openblas-2s
|
||||
vendor: vendor-st vendor-1s vendor-2s
|
||||
mkl: vendor
|
||||
armpl: vendor
|
||||
|
||||
all-st: blis-st openblas-st mkl-st
|
||||
all-mt: blis-mt openblas-mt mkl-mt
|
||||
all-1s: blis-1s openblas-1s mkl-1s
|
||||
all-2s: blis-2s openblas-2s mkl-2s
|
||||
|
||||
blis-st: blis-nat-st
|
||||
blis-mt: blis-nat-mt
|
||||
blis-1s: blis-nat-1s
|
||||
blis-2s: blis-nat-2s
|
||||
|
||||
blis-ind: blis-ind-st blis-ind-mt
|
||||
blis-nat: blis-nat-st blis-nat-mt
|
||||
#blis-ind: blis-ind-st blis-ind-mt
|
||||
blis-nat: blis-nat-st blis-nat-1s blis-nat-2s
|
||||
|
||||
blis-ind-st: \
|
||||
test_cgemm_3mhw_blis_st.x \
|
||||
test_zgemm_3mhw_blis_st.x \
|
||||
test_cgemm_3m1_blis_st.x \
|
||||
test_zgemm_3m1_blis_st.x \
|
||||
test_cgemm_4mhw_blis_st.x \
|
||||
test_zgemm_4mhw_blis_st.x \
|
||||
test_cgemm_4m1b_blis_st.x \
|
||||
test_zgemm_4m1b_blis_st.x \
|
||||
test_cgemm_4m1a_blis_st.x \
|
||||
test_zgemm_4m1a_blis_st.x \
|
||||
test_cgemm_1m_blis_st.x \
|
||||
test_zgemm_1m_blis_st.x
|
||||
# Define the datatypes, operations, and implementations.
|
||||
DTS := s d c z
|
||||
OPS := gemm hemm herk trmm trsm
|
||||
IMPLS := asm_blis openblas vendor
|
||||
|
||||
blis-ind-mt: \
|
||||
test_cgemm_3mhw_blis_mt.x \
|
||||
test_zgemm_3mhw_blis_mt.x \
|
||||
test_cgemm_3m1_blis_mt.x \
|
||||
test_zgemm_3m1_blis_mt.x \
|
||||
test_cgemm_4mhw_blis_mt.x \
|
||||
test_zgemm_4mhw_blis_mt.x \
|
||||
test_cgemm_4m1b_blis_mt.x \
|
||||
test_zgemm_4m1b_blis_mt.x \
|
||||
test_cgemm_4m1a_blis_mt.x \
|
||||
test_zgemm_4m1a_blis_mt.x \
|
||||
test_cgemm_1m_blis_mt.x \
|
||||
test_zgemm_1m_blis_mt.x
|
||||
# Define functions to construct object filenames from the datatypes and
|
||||
# operations given an implementation. We define one function for single-
|
||||
# threaded, single-socket, and dual-socket filenames.
|
||||
get-st-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(PS_MAX)_$(1)_st.o))
|
||||
get-1s-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(P1_MAX)_$(1)_1s.o))
|
||||
get-2s-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(P2_MAX)_$(1)_2s.o))
|
||||
|
||||
blis-nat-st: \
|
||||
test_sgemm_asm_blis_st.x \
|
||||
test_dgemm_asm_blis_st.x \
|
||||
test_cgemm_asm_blis_st.x \
|
||||
test_zgemm_asm_blis_st.x \
|
||||
test_shemm_asm_blis_st.x \
|
||||
test_dhemm_asm_blis_st.x \
|
||||
test_chemm_asm_blis_st.x \
|
||||
test_zhemm_asm_blis_st.x \
|
||||
test_sherk_asm_blis_st.x \
|
||||
test_dherk_asm_blis_st.x \
|
||||
test_cherk_asm_blis_st.x \
|
||||
test_zherk_asm_blis_st.x \
|
||||
test_strmm_asm_blis_st.x \
|
||||
test_dtrmm_asm_blis_st.x \
|
||||
test_ctrmm_asm_blis_st.x \
|
||||
test_ztrmm_asm_blis_st.x \
|
||||
test_strsm_asm_blis_st.x \
|
||||
test_dtrsm_asm_blis_st.x \
|
||||
test_ctrsm_asm_blis_st.x \
|
||||
test_ztrsm_asm_blis_st.x
|
||||
# Construct object and binary names for single-threaded, single-socket, and
|
||||
# dual-socket files for BLIS, OpenBLAS, and a vendor library (e.g. MKL).
|
||||
BLIS_NAT_ST_OBJS := $(call get-st-objs,asm_blis)
|
||||
BLIS_NAT_ST_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_ST_OBJS))
|
||||
BLIS_NAT_1S_OBJS := $(call get-1s-objs,asm_blis)
|
||||
BLIS_NAT_1S_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_1S_OBJS))
|
||||
BLIS_NAT_2S_OBJS := $(call get-2s-objs,asm_blis)
|
||||
BLIS_NAT_2S_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_2S_OBJS))
|
||||
|
||||
blis-nat-mt: \
|
||||
test_sgemm_asm_blis_mt.x \
|
||||
test_dgemm_asm_blis_mt.x \
|
||||
test_cgemm_asm_blis_mt.x \
|
||||
test_zgemm_asm_blis_mt.x \
|
||||
test_shemm_asm_blis_mt.x \
|
||||
test_dhemm_asm_blis_mt.x \
|
||||
test_chemm_asm_blis_mt.x \
|
||||
test_zhemm_asm_blis_mt.x \
|
||||
test_sherk_asm_blis_mt.x \
|
||||
test_dherk_asm_blis_mt.x \
|
||||
test_cherk_asm_blis_mt.x \
|
||||
test_zherk_asm_blis_mt.x \
|
||||
test_strmm_asm_blis_mt.x \
|
||||
test_dtrmm_asm_blis_mt.x \
|
||||
test_ctrmm_asm_blis_mt.x \
|
||||
test_ztrmm_asm_blis_mt.x \
|
||||
test_strsm_asm_blis_mt.x \
|
||||
test_dtrsm_asm_blis_mt.x \
|
||||
test_ctrsm_asm_blis_mt.x \
|
||||
test_ztrsm_asm_blis_mt.x
|
||||
OPENBLAS_ST_OBJS := $(call get-st-objs,openblas)
|
||||
OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS))
|
||||
OPENBLAS_1S_OBJS := $(call get-1s-objs,openblas)
|
||||
OPENBLAS_1S_BINS := $(patsubst %.o,%.x,$(OPENBLAS_1S_OBJS))
|
||||
OPENBLAS_2S_OBJS := $(call get-2s-objs,openblas)
|
||||
OPENBLAS_2S_BINS := $(patsubst %.o,%.x,$(OPENBLAS_2S_OBJS))
|
||||
|
||||
openblas-st: \
|
||||
test_sgemm_openblas_st.x \
|
||||
test_dgemm_openblas_st.x \
|
||||
test_cgemm_openblas_st.x \
|
||||
test_zgemm_openblas_st.x \
|
||||
test_shemm_openblas_st.x \
|
||||
test_dhemm_openblas_st.x \
|
||||
test_chemm_openblas_st.x \
|
||||
test_zhemm_openblas_st.x \
|
||||
test_sherk_openblas_st.x \
|
||||
test_dherk_openblas_st.x \
|
||||
test_cherk_openblas_st.x \
|
||||
test_zherk_openblas_st.x \
|
||||
test_strmm_openblas_st.x \
|
||||
test_dtrmm_openblas_st.x \
|
||||
test_ctrmm_openblas_st.x \
|
||||
test_ztrmm_openblas_st.x \
|
||||
test_strsm_openblas_st.x \
|
||||
test_dtrsm_openblas_st.x \
|
||||
test_ctrsm_openblas_st.x \
|
||||
test_ztrsm_openblas_st.x
|
||||
VENDOR_ST_OBJS := $(call get-st-objs,vendor)
|
||||
VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS))
|
||||
VENDOR_1S_OBJS := $(call get-1s-objs,vendor)
|
||||
VENDOR_1S_BINS := $(patsubst %.o,%.x,$(VENDOR_1S_OBJS))
|
||||
VENDOR_2S_OBJS := $(call get-2s-objs,vendor)
|
||||
VENDOR_2S_BINS := $(patsubst %.o,%.x,$(VENDOR_2S_OBJS))
|
||||
|
||||
openblas-mt: \
|
||||
test_sgemm_openblas_mt.x \
|
||||
test_dgemm_openblas_mt.x \
|
||||
test_cgemm_openblas_mt.x \
|
||||
test_zgemm_openblas_mt.x \
|
||||
test_shemm_openblas_mt.x \
|
||||
test_dhemm_openblas_mt.x \
|
||||
test_chemm_openblas_mt.x \
|
||||
test_zhemm_openblas_mt.x \
|
||||
test_sherk_openblas_mt.x \
|
||||
test_dherk_openblas_mt.x \
|
||||
test_cherk_openblas_mt.x \
|
||||
test_zherk_openblas_mt.x \
|
||||
test_strmm_openblas_mt.x \
|
||||
test_dtrmm_openblas_mt.x \
|
||||
test_ctrmm_openblas_mt.x \
|
||||
test_ztrmm_openblas_mt.x \
|
||||
test_strsm_openblas_mt.x \
|
||||
test_dtrsm_openblas_mt.x \
|
||||
test_ctrsm_openblas_mt.x \
|
||||
test_ztrsm_openblas_mt.x
|
||||
# Define some targets associated with the above object/binary files.
|
||||
blis-nat-st: $(BLIS_NAT_ST_BINS)
|
||||
blis-nat-1s: $(BLIS_NAT_1S_BINS)
|
||||
blis-nat-2s: $(BLIS_NAT_2S_BINS)
|
||||
|
||||
mkl-st: \
|
||||
test_sgemm_mkl_st.x \
|
||||
test_dgemm_mkl_st.x \
|
||||
test_cgemm_mkl_st.x \
|
||||
test_zgemm_mkl_st.x \
|
||||
test_shemm_mkl_st.x \
|
||||
test_dhemm_mkl_st.x \
|
||||
test_chemm_mkl_st.x \
|
||||
test_zhemm_mkl_st.x \
|
||||
test_sherk_mkl_st.x \
|
||||
test_dherk_mkl_st.x \
|
||||
test_cherk_mkl_st.x \
|
||||
test_zherk_mkl_st.x \
|
||||
test_strmm_mkl_st.x \
|
||||
test_dtrmm_mkl_st.x \
|
||||
test_ctrmm_mkl_st.x \
|
||||
test_ztrmm_mkl_st.x \
|
||||
test_strsm_mkl_st.x \
|
||||
test_dtrsm_mkl_st.x \
|
||||
test_ctrsm_mkl_st.x \
|
||||
test_ztrsm_mkl_st.x
|
||||
openblas-st: $(OPENBLAS_ST_BINS)
|
||||
openblas-1s: $(OPENBLAS_1S_BINS)
|
||||
openblas-2s: $(OPENBLAS_2S_BINS)
|
||||
|
||||
mkl-mt: \
|
||||
test_sgemm_mkl_mt.x \
|
||||
test_dgemm_mkl_mt.x \
|
||||
test_cgemm_mkl_mt.x \
|
||||
test_zgemm_mkl_mt.x \
|
||||
test_shemm_mkl_mt.x \
|
||||
test_dhemm_mkl_mt.x \
|
||||
test_chemm_mkl_mt.x \
|
||||
test_zhemm_mkl_mt.x \
|
||||
test_sherk_mkl_mt.x \
|
||||
test_dherk_mkl_mt.x \
|
||||
test_cherk_mkl_mt.x \
|
||||
test_zherk_mkl_mt.x \
|
||||
test_strmm_mkl_mt.x \
|
||||
test_dtrmm_mkl_mt.x \
|
||||
test_ctrmm_mkl_mt.x \
|
||||
test_ztrmm_mkl_mt.x \
|
||||
test_strsm_mkl_mt.x \
|
||||
test_dtrsm_mkl_mt.x \
|
||||
test_ctrsm_mkl_mt.x \
|
||||
test_ztrsm_mkl_mt.x
|
||||
vendor-st: $(VENDOR_ST_BINS)
|
||||
vendor-1s: $(VENDOR_1S_BINS)
|
||||
vendor-2s: $(VENDOR_2S_BINS)
|
||||
|
||||
mkl-st: vendor-st
|
||||
mkl-1s: vendor-1s
|
||||
mkl-2s: vendor-2s
|
||||
|
||||
armpl-st: vendor-st
|
||||
armpl-1s: vendor-1s
|
||||
armpl-2s: vendor-2s
|
||||
|
||||
# Mark the object files as intermediate so that make will remove them
|
||||
# automatically after building the binaries on which they depend.
|
||||
.INTERMEDIATE: $(BLIS_NAT_ST_OBJS) $(OPENBLAS_ST_OBJS) $(VENDOR_ST_OBJS)
|
||||
.INTERMEDIATE: $(BLIS_NAT_1S_OBJS) $(OPENBLAS_1S_OBJS) $(VENDOR_1S_OBJS)
|
||||
.INTERMEDIATE: $(BLIS_NAT_2S_OBJS) $(OPENBLAS_2S_OBJS) $(VENDOR_2S_OBJS)
|
||||
|
||||
|
||||
# --Object file rules --
|
||||
|
||||
$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
#$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c
|
||||
# $(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
# blis 3mhw
|
||||
test_z%_3mhw_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@
|
||||
# A function to return the datatype cpp macro def from the datatype
|
||||
# character.
|
||||
get-dt-cpp = -DDT=bli_$(1)type
|
||||
|
||||
test_c%_3mhw_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@
|
||||
# A function to return other cpp macros that help the test driver
|
||||
# identify the implementation.
|
||||
get-bl-cpp = $(strip \
|
||||
$(if $(findstring blis,$(1)),$(STR_NAT) $(BLI_DEF),\
|
||||
$(if $(findstring openblas,$(1)),$(STR_OBL) $(BLA_DEF),\
|
||||
$(STR_VEN) $(BLA_DEF))))
|
||||
|
||||
test_z%_3mhw_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@
|
||||
define make-st-rule
|
||||
test_$(1)$(2)_$(PS_MAX)_$(3)_st.o: test_$(op).c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3)) $(DNAT) $(STR_ST) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
test_c%_3mhw_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@
|
||||
define make-1s-rule
|
||||
test_$(1)$(2)_$(P1_MAX)_$(3)_1s.o: test_$(op).c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_1S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3)) $(DNAT) $(STR_1S) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# blis 3m1
|
||||
test_z%_3m1_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@
|
||||
define make-2s-rule
|
||||
test_$(1)$(2)_$(P2_MAX)_$(3)_2s.o: test_$(op).c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_2S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3)) $(DNAT) $(STR_2S) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
test_c%_3m1_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach op,$(OPS), \
|
||||
$(foreach im,$(IMPLS),$(eval $(call make-st-rule,$(dt),$(op),$(im))))))
|
||||
|
||||
test_z%_3m1_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach op,$(OPS), \
|
||||
$(foreach im,$(IMPLS),$(eval $(call make-1s-rule,$(dt),$(op),$(im))))))
|
||||
|
||||
test_c%_3m1_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@
|
||||
|
||||
# blis 4mhw
|
||||
test_z%_4mhw_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_4mhw_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_4mhw_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_4mhw_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@
|
||||
|
||||
# blis 4m1b
|
||||
test_z%_4m1b_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_4m1b_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_4m1b_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_4m1b_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@
|
||||
|
||||
# blis 4m1a
|
||||
test_z%_4m1a_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_4m1a_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_4m1a_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_4m1a_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@
|
||||
|
||||
# blis 1m
|
||||
test_z%_1m_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_1m_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_1m_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_1m_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@
|
||||
|
||||
# blis asm
|
||||
test_d%_asm_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_s%_asm_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_asm_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_asm_blis_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_d%_asm_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_s%_asm_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_z%_asm_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_asm_blis_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@
|
||||
|
||||
# openblas
|
||||
test_d%_openblas_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_s%_openblas_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_openblas_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_openblas_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_d%_openblas_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_s%_openblas_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_z%_openblas_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_openblas_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
# mkl
|
||||
test_d%_mkl_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_s%_mkl_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_z%_mkl_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_c%_mkl_st.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_d%_mkl_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_s%_mkl_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_z%_mkl_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_c%_mkl_mt.o: test_%.c Makefile
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach op,$(OPS), \
|
||||
$(foreach im,$(IMPLS),$(eval $(call make-2s-rule,$(dt),$(op),$(im))))))
|
||||
|
||||
|
||||
# -- Executable file rules --
|
||||
@@ -558,23 +349,34 @@ test_c%_mkl_mt.o: test_%.c Makefile
|
||||
# compatibility layer. This prevents BLIS from inadvertently getting called
|
||||
# for the BLAS routines we are trying to test with.
|
||||
|
||||
test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
|
||||
test_%_$(PS_MAX)_openblas_st.x: test_%_$(PS_MAX)_openblas_st.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
|
||||
test_%_$(P1_MAX)_openblas_1s.x: test_%_$(P1_MAX)_openblas_1s.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_mkl_st.x: test_%_mkl_st.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
|
||||
test_%_$(P2_MAX)_openblas_2s.x: test_%_$(P2_MAX)_openblas_2s.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_mkl_mt.x: test_%_mkl_mt.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $< $(MKLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
|
||||
|
||||
test_%_blis_st.x: test_%_blis_st.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
|
||||
test_%_$(PS_MAX)_vendor_st.x: test_%_$(PS_MAX)_vendor_st.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_blis_mt.x: test_%_blis_mt.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
|
||||
test_%_$(P1_MAX)_vendor_1s.x: test_%_$(P1_MAX)_vendor_1s.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_$(P2_MAX)_vendor_2s.x: test_%_$(P2_MAX)_vendor_2s.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
|
||||
test_%_$(PS_MAX)_asm_blis_st.x: test_%_$(PS_MAX)_asm_blis_st.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_$(P1_MAX)_asm_blis_1s.x: test_%_$(P1_MAX)_asm_blis_1s.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_$(P2_MAX)_asm_blis_2s.x: test_%_$(P2_MAX)_asm_blis_2s.o $(LIBBLIS_LINK)
|
||||
$(LINKER) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
|
||||
# -- Clean rules --
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
% tx2
|
||||
plot_panel_4x5(2.2,8,1, '../results/tx2/st', 'tx2', 'ARMPL'); close; clear all;
|
||||
plot_panel_4x5(2.2,8,28,'../results/tx2/jc4ic7','tx2_jc4ic7','ARMPL'); close; clear all;
|
||||
plot_panel_4x5(2.2,8,56,'../results/tx2/jc8ic7','tx2_jc8ic7','ARMPL'); close; clear all;
|
||||
|
||||
% skx
|
||||
plot_panel_4x5(2.0,32,1,'../results/skx/st/20190218','skx','MKL'); close; clear all;
|
||||
plot_panel_4x5(2.0,32,26,'../results/skx/jc2ic13/20190218','skx_jc2ic13','MKL'); close; clear all;
|
||||
plot_panel_4x5(2.0,32,52,'../results/skx/jc4ic13/20190218','skx_jc4ic13','MKL'); close; clear all;
|
||||
@@ -99,7 +99,7 @@ vend_ln = line( x_axis( :, 1 ), data_vend( :, flopscol ) / nth, ...
|
||||
xlim( ax1, [x_begin x_end] );
|
||||
ylim( ax1, [y_begin y_end] );
|
||||
|
||||
if x_end == 10000 || x_end == 8000
|
||||
if x_end == 10000 || x_end == 8000 || x_end == 6000
|
||||
x_tick2 = x_end - 2000;
|
||||
x_tick1 = x_tick2/2;
|
||||
xticks( ax1, [ x_tick1 x_tick2 ] );
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
function r_val = plot_panel_4x5( cfreq, ...
|
||||
dflopspercycle, ...
|
||||
nth, ...
|
||||
thr_str, ...
|
||||
dirpath, ...
|
||||
arch_str, ...
|
||||
vend_str )
|
||||
@@ -12,18 +13,12 @@ function r_val = plot_panel_4x5( cfreq, ...
|
||||
% results.
|
||||
filetemp_blis = '%s/output_%s_%s_asm_blis.m';
|
||||
filetemp_open = '%s/output_%s_%s_openblas.m';
|
||||
filetemp_mkl = '%s/output_%s_%s_mkl.m';
|
||||
filetemp_vend = '%s/output_%s_%s_vendor.m';
|
||||
|
||||
% Create a variable name "template" for the variables contained in the
|
||||
% files outlined above.
|
||||
vartemp = 'data_%s_%s_%s( :, : )';
|
||||
|
||||
if nth == 1
|
||||
thr_str = 'st';
|
||||
else
|
||||
thr_str = 'mt';
|
||||
end
|
||||
|
||||
% Define the datatypes and operations we will be plotting.
|
||||
dts = [ 's' 'd' 'c' 'z' ];
|
||||
ops( 1, : ) = 'gemm';
|
||||
@@ -63,26 +58,26 @@ for opi = 1:n_opnames
|
||||
% Construct filenames for the data files from templates.
|
||||
file_blis = sprintf( filetemp_blis, dirpath, thr_str, opname );
|
||||
file_open = sprintf( filetemp_open, dirpath, thr_str, opname );
|
||||
file_mkl = sprintf( filetemp_mkl, dirpath, thr_str, opname );
|
||||
file_vend = sprintf( filetemp_vend, dirpath, thr_str, opname );
|
||||
|
||||
% Load the data files.
|
||||
%str = sprintf( ' Loading %s', file_blis ); disp(str);
|
||||
run( file_blis )
|
||||
%str = sprintf( ' Loading %s', file_open ); disp(str);
|
||||
run( file_open )
|
||||
%str = sprintf( ' Loading %s', file_mkl ); disp(str);
|
||||
run( file_mkl )
|
||||
%str = sprintf( ' Loading %s', file_vend ); disp(str);
|
||||
run( file_vend )
|
||||
|
||||
% Construct variable names for the variables in the data files.
|
||||
var_blis = sprintf( vartemp, thr_str, opname, 'asm_blis' );
|
||||
var_open = sprintf( vartemp, thr_str, opname, 'openblas' );
|
||||
var_vend = sprintf( vartemp, thr_str, opname, 'mkl' );
|
||||
var_vend = sprintf( vartemp, thr_str, opname, 'vendor' );
|
||||
|
||||
% Use eval() to instantiate the variable names constructed above,
|
||||
% copying each to a simplified name.
|
||||
data_blis = eval( var_blis ); % e.g. data_st_sgemm_asm_blis( :, : );
|
||||
data_open = eval( var_open ); % e.g. data_st_sgemm_openblas( :, : );
|
||||
data_vend = eval( var_vend ); % e.g. data_st_sgemm_mkl( :, : );
|
||||
data_vend = eval( var_vend ); % e.g. data_st_sgemm_vendor( :, : );
|
||||
|
||||
% Plot one result in an m x n grid of plots, via the subplot()
|
||||
% function.
|
||||
|
||||
9
test/3m4m/matlab/runme.m
Normal file
9
test/3m4m/matlab/runme.m
Normal file
@@ -0,0 +1,9 @@
|
||||
% tx2
|
||||
plot_panel_4x5(2.2,8,1, 'st','../results/tx2/st', 'tx2', 'ARMPL'); close; clear all;
|
||||
plot_panel_4x5(2.2,8,28,'1s','../results/tx2/jc4ic7','tx2_jc4ic7','ARMPL'); close; clear all;
|
||||
plot_panel_4x5(2.2,8,56,'2s','../results/tx2/jc8ic7','tx2_jc8ic7','ARMPL'); close; clear all;
|
||||
|
||||
% skx
|
||||
plot_panel_4x5(2.0,32,1, 'st','../results/skx/st/20190218', 'skx', 'MKL'); close; clear all;
|
||||
plot_panel_4x5(2.0,32,26,'1s','../results/skx/jc2ic13/20190218','skx_jc2ic13','MKL'); close; clear all;
|
||||
plot_panel_4x5(2.0,32,52,'2s','../results/skx/jc4ic13/20190218','skx_jc4ic13','MKL'); close; clear all;
|
||||
@@ -3,37 +3,33 @@
|
||||
# File pefixes.
|
||||
exec_root="test"
|
||||
out_root="output"
|
||||
delay=0.1
|
||||
|
||||
#sys="blis"
|
||||
#sys="stampede2"
|
||||
sys="lonestar5"
|
||||
#sys="lonestar5"
|
||||
#sys="ul252"
|
||||
sys="ul264"
|
||||
|
||||
# Bind threads to processors.
|
||||
#export OMP_PROC_BIND=true
|
||||
#export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23"
|
||||
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103"
|
||||
|
||||
# Modify LD_LIBRARY_PATH.
|
||||
if [ ${sys} = "blis" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3"
|
||||
|
||||
jc_nt=1 # 5th loop
|
||||
ic_nt=4 # 3rd loop
|
||||
jr_nt=1 # 2nd loop
|
||||
ir_nt=1 # 1st loop
|
||||
nt=4
|
||||
threads="jc2ic2jr1_4000
|
||||
jc2ic2jr1_6000"
|
||||
|
||||
elif [ ${sys} = "stampede2" ]; then
|
||||
|
||||
echo "Need to set GOMP_CPU_AFFINITY."
|
||||
exit 1
|
||||
|
||||
jc_nt=4 # 5th loop
|
||||
ic_nt=12 # 3rd loop
|
||||
jr_nt=1 # 2nd loop
|
||||
ir_nt=1 # 1st loop
|
||||
nt=48
|
||||
threads="jc4ic6jr1_6000
|
||||
jc4ic12jr1_8000"
|
||||
|
||||
elif [ ${sys} = "lonestar5" ]; then
|
||||
|
||||
@@ -42,148 +38,115 @@ elif [ ${sys} = "lonestar5" ]; then
|
||||
# A hack to use libiomp5 with gcc.
|
||||
#export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64"
|
||||
|
||||
# runner-up:
|
||||
#jc_nt=6 # 5th loop
|
||||
#ic_nt=4 # 3rd loop
|
||||
#jr_nt=1 # 2nd loop
|
||||
|
||||
jc_nt=2 # 5th loop
|
||||
ic_nt=3 # 3rd loop
|
||||
jr_nt=2 # 2nd loop
|
||||
|
||||
ir_nt=1 # 1st loop
|
||||
nt=12
|
||||
threads="jc2ic3jr2_6000
|
||||
jc4ic3jr2_8000"
|
||||
|
||||
elif [ ${sys} = "ul252" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103"
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51"
|
||||
|
||||
#jc_nt=4 # 5th loop
|
||||
jc_nt=2 # 5th loop
|
||||
ic_nt=13 # 3rd loop
|
||||
jr_nt=1 # 2nd loop
|
||||
ir_nt=1 # 1st loop
|
||||
#nt=52
|
||||
nt=26
|
||||
threads="jc2ic13jr1_6000
|
||||
jc4ic13jr1_8000"
|
||||
|
||||
elif [ ${sys} = "ul264" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63"
|
||||
|
||||
threads="jc1ic8jr4_6000
|
||||
jc2ic8jr4_8000"
|
||||
|
||||
fi
|
||||
|
||||
echo "Setting BLIS threading params for ${sys}: jc${jc_nt}ic${ic_nt}jr${jr_nt}."
|
||||
# Datatypes to test.
|
||||
test_dts="d s z c"
|
||||
|
||||
# Operations to test.
|
||||
test_ops="gemm hemm herk trmm trsm"
|
||||
test_ops="gemm"
|
||||
|
||||
# Implementations to test.
|
||||
impls="all"
|
||||
#impls="other"
|
||||
#impls="blis"
|
||||
|
||||
if [ "${impls}" = "blis" ]; then
|
||||
|
||||
test_impls="asm_blis"
|
||||
|
||||
elif [ "${impls}" = "other" ]; then
|
||||
|
||||
test_impls="openblas vendor"
|
||||
|
||||
else
|
||||
|
||||
test_impls="openblas asm_blis vendor"
|
||||
fi
|
||||
|
||||
# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can
|
||||
# restore the value.
|
||||
GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY}
|
||||
|
||||
# Threadedness to test.
|
||||
threads="mt"
|
||||
threads_r="mt"
|
||||
#threads="st"
|
||||
#threads_r="st"
|
||||
|
||||
# Datatypes to test.
|
||||
dts=""
|
||||
dts_r=""
|
||||
dts="z c"
|
||||
dts_r="d s"
|
||||
|
||||
# Operations to test.
|
||||
l3_ops="gemm hemm herk trmm trsm"
|
||||
test_ops="${l3_ops}"
|
||||
test_ops_r="${l3_ops}"
|
||||
|
||||
# Complex domain implementations to test.
|
||||
#test_impls="3mhw_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis"
|
||||
#test_impls="openblas mkl asm_blis"
|
||||
|
||||
# Implementations to test.
|
||||
impls="allasm"
|
||||
|
||||
if [ ${impls} = "allasm" ]; then
|
||||
|
||||
test_impls_r="openblas asm_blis mkl"
|
||||
test_impls="openblas asm_blis mkl"
|
||||
|
||||
elif [ ${impls} = "comp" ]; then
|
||||
|
||||
test_impls_r="openblas mkl"
|
||||
test_impls="openblas mkl"
|
||||
|
||||
elif [ ${impls} = "blis" ]; then
|
||||
|
||||
test_impls_r="asm_blis"
|
||||
test_impls="asm_blis"
|
||||
fi
|
||||
|
||||
# First perform real test cases.
|
||||
for th in ${threads_r}; do
|
||||
|
||||
for dt in ${dts_r}; do
|
||||
|
||||
for im in ${test_impls_r}; do
|
||||
|
||||
for op in ${test_ops_r}; do
|
||||
|
||||
# Set the number of threads according to th.
|
||||
if [ ${th} = "mt" ]; then
|
||||
|
||||
export BLIS_JC_NT=${jc_nt}
|
||||
export BLIS_IC_NT=${ic_nt}
|
||||
export BLIS_JR_NT=${jr_nt}
|
||||
export BLIS_IR_NT=${ir_nt}
|
||||
export OPENBLAS_NUM_THREADS=${nt}
|
||||
export MKL_NUM_THREADS=${nt}
|
||||
export nt_use=${nt}
|
||||
|
||||
# Unset GOMP_CPU_AFFINITY for OpenBLAS.
|
||||
if [ ${im} = "openblas" ]; then
|
||||
|
||||
unset GOMP_CPU_AFFINITY
|
||||
else
|
||||
export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}"
|
||||
fi
|
||||
else
|
||||
|
||||
export BLIS_JC_NT=1
|
||||
export BLIS_IC_NT=1
|
||||
export BLIS_JR_NT=1
|
||||
export BLIS_IR_NT=1
|
||||
export OPENBLAS_NUM_THREADS=1
|
||||
export MKL_NUM_THREADS=1
|
||||
export nt_use=1
|
||||
fi
|
||||
|
||||
# Construct the name of the test executable.
|
||||
exec_name="${exec_root}_${dt}${op}_${im}_${th}.x"
|
||||
|
||||
# Construct the name of the output file.
|
||||
out_file="${out_root}_${th}_${dt}${op}_${im}.m"
|
||||
|
||||
echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}"
|
||||
|
||||
# Run executable.
|
||||
./${exec_name} > ${out_file}
|
||||
|
||||
sleep 1
|
||||
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
# Now perform complex test cases.
|
||||
for th in ${threads}; do
|
||||
|
||||
for dt in ${dts}; do
|
||||
# Start with one way of parallelism in each loop. We will now begin
|
||||
# parsing the 'th' variable to update one or more of these threading
|
||||
# parameters.
|
||||
jc_nt=1; pc_nt=1; ic_nt=1; jr_nt=1; ir_nt=1
|
||||
|
||||
# Strip everything before and after the underscore so that what remains
|
||||
# is the problem size and threading parameter string, respectively.
|
||||
psize=${th##*_}; thinfo=${th%%_*}
|
||||
|
||||
# Identify each threading parameter and insert a space before it.
|
||||
thsep=$(echo -e ${thinfo} | sed -e "s/\([jip][cr]\)/ \1/g" )
|
||||
|
||||
nt=1
|
||||
|
||||
for loopnum in ${thsep}; do
|
||||
|
||||
# Given the current string, which identifies a loop and the
|
||||
# number of ways of parallelism for that loop, strip out
|
||||
# the ways and loop separately to identify each.
|
||||
loop=$(echo -e ${loopnum} | sed -e "s/[0-9]//g" )
|
||||
num=$(echo -e ${loopnum} | sed -e "s/[a-z]//g" )
|
||||
|
||||
# Construct a string that we can evaluate to set the number
|
||||
# of ways of parallelism for the current loop.
|
||||
loop_nt_eq_num="${loop}_nt=${num}"
|
||||
|
||||
# Update the total number of threads.
|
||||
nt=$(expr ${nt} \* ${num})
|
||||
|
||||
# Evaluate the string to assign the ways to the variable.
|
||||
eval ${loop_nt_eq_num}
|
||||
|
||||
done
|
||||
|
||||
echo "Switching to: jc${jc_nt} pc${pc_nt} ic${ic_nt} jr${jr_nt} ir${ir_nt} (nt = ${nt}) p_max${psize}"
|
||||
|
||||
|
||||
for dt in ${test_dts}; do
|
||||
|
||||
for im in ${test_impls}; do
|
||||
|
||||
for op in ${test_ops}; do
|
||||
|
||||
# Find the threading suffix by probing the executable.
|
||||
binname=$(ls ${exec_root}_${dt}${op}_${psize}_${im}_*.x)
|
||||
suf_ext=${binname##*_}
|
||||
suf=${suf_ext%%.*}
|
||||
|
||||
#echo "found file: ${binname} with suffix ${suf}"
|
||||
|
||||
# Set the number of threads according to th.
|
||||
if [ ${th} = "mt" ]; then
|
||||
if [ "${suf}" = "1s" ] || [ "${suf}" = "2s" ]; then
|
||||
|
||||
export BLIS_JC_NT=${jc_nt}
|
||||
export BLIS_PC_NT=${pc_nt}
|
||||
export BLIS_IC_NT=${ic_nt}
|
||||
export BLIS_JR_NT=${jr_nt}
|
||||
export BLIS_IR_NT=${ir_nt}
|
||||
@@ -191,9 +154,11 @@ for th in ${threads}; do
|
||||
export MKL_NUM_THREADS=${nt}
|
||||
export nt_use=${nt}
|
||||
|
||||
# Unset GOMP_CPU_AFFINITY for OpenBLAS.
|
||||
# Multithreaded OpenBLAS seems to have a problem running
|
||||
# properly if GOMP_CPU_AFFINITY is set. So we temporarily
|
||||
# unset it here if we are about to execute OpenBLAS, but
|
||||
# otherwise restore it.
|
||||
if [ ${im} = "openblas" ]; then
|
||||
|
||||
unset GOMP_CPU_AFFINITY
|
||||
else
|
||||
export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}"
|
||||
@@ -201,6 +166,7 @@ for th in ${threads}; do
|
||||
else
|
||||
|
||||
export BLIS_JC_NT=1
|
||||
export BLIS_PC_NT=1
|
||||
export BLIS_IC_NT=1
|
||||
export BLIS_JR_NT=1
|
||||
export BLIS_IR_NT=1
|
||||
@@ -210,19 +176,21 @@ for th in ${threads}; do
|
||||
fi
|
||||
|
||||
# Construct the name of the test executable.
|
||||
exec_name="${exec_root}_${dt}${op}_${im}_${th}.x"
|
||||
exec_name="${exec_root}_${dt}${op}_${psize}_${im}_${suf}.x"
|
||||
|
||||
# Construct the name of the output file.
|
||||
out_file="${out_root}_${th}_${dt}${op}_${im}.m"
|
||||
out_file="${out_root}_${suf}_${dt}${op}_${im}.m"
|
||||
|
||||
echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}"
|
||||
#echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}"
|
||||
echo "Running ./${exec_name} > ${out_file}"
|
||||
|
||||
# Run executable.
|
||||
./${exec_name} > ${out_file}
|
||||
#./${exec_name} > ${out_file}
|
||||
|
||||
sleep 1
|
||||
sleep ${delay}
|
||||
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ int main( int argc, char** argv )
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n, k;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
dim_t p_begin, p_max, p_inc;
|
||||
int m_input, n_input, k_input;
|
||||
ind_t ind;
|
||||
num_t dt;
|
||||
@@ -70,7 +70,7 @@ int main( int argc, char** argv )
|
||||
ind = IND;
|
||||
|
||||
p_begin = P_BEGIN;
|
||||
p_end = P_END;
|
||||
p_max = P_MAX;
|
||||
p_inc = P_INC;
|
||||
|
||||
m_input = -1;
|
||||
@@ -116,12 +116,9 @@ int main( int argc, char** argv )
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
|
||||
|
||||
printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
@@ -129,7 +126,7 @@ int main( int argc, char** argv )
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
|
||||
for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_begin; p <= p_max; p += p_inc )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
|
||||
@@ -157,7 +154,6 @@ int main( int argc, char** argv )
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
#if 0 //def BLIS
|
||||
@@ -173,7 +169,6 @@ int main( int argc, char** argv )
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "b", &b, "%4.1f", "" );
|
||||
@@ -190,107 +185,106 @@ int main( int argc, char** argv )
|
||||
|
||||
#else
|
||||
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* bp = bli_obj_buffer( &b );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* bp = bli_obj_buffer( &b );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
|
||||
sgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* bp = bli_obj_buffer( &b );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
sgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* bp = bli_obj_buffer( &b );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
|
||||
dgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* bp = bli_obj_buffer( &b );
|
||||
scomplex* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
dgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* bp = bli_obj_buffer( &b );
|
||||
scomplex* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
cgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* bp = bli_obj_buffer( &b );
|
||||
dcomplex* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
cgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* bp = bli_obj_buffer( &b );
|
||||
dcomplex* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
zgemm_( &f77_transa,
|
||||
//zgemm3m_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
zgemm_( &f77_transa,
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
@@ -298,7 +292,6 @@ int main( int argc, char** argv )
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
@@ -306,11 +299,7 @@ int main( int argc, char** argv )
|
||||
|
||||
if ( bli_is_complex( dt ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
|
||||
@@ -44,7 +44,7 @@ int main( int argc, char** argv )
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
dim_t p_begin, p_max, p_inc;
|
||||
int m_input, n_input;
|
||||
ind_t ind;
|
||||
num_t dt;
|
||||
@@ -70,7 +70,7 @@ int main( int argc, char** argv )
|
||||
ind = IND;
|
||||
|
||||
p_begin = P_BEGIN;
|
||||
p_end = P_END;
|
||||
p_max = P_MAX;
|
||||
p_inc = P_INC;
|
||||
|
||||
m_input = -1;
|
||||
@@ -115,19 +115,16 @@ int main( int argc, char** argv )
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
|
||||
|
||||
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
|
||||
for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_begin; p <= p_max; p += p_inc )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
|
||||
@@ -161,7 +158,6 @@ int main( int argc, char** argv )
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
#if 0 //def BLIS
|
||||
@@ -177,7 +173,6 @@ int main( int argc, char** argv )
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "b", &b, "%4.1f", "" );
|
||||
@@ -195,98 +190,98 @@ int main( int argc, char** argv )
|
||||
|
||||
#else
|
||||
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* bp = bli_obj_buffer( &b );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* bp = bli_obj_buffer( &b );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
|
||||
ssymm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* bp = bli_obj_buffer( &b );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
ssymm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* bp = bli_obj_buffer( &b );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
|
||||
dsymm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* bp = bli_obj_buffer( &b );
|
||||
scomplex* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
dsymm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* bp = bli_obj_buffer( &b );
|
||||
scomplex* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
chemm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* bp = bli_obj_buffer( &b );
|
||||
dcomplex* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
chemm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* bp = bli_obj_buffer( &b );
|
||||
dcomplex* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
zhemm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
zhemm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
@@ -294,7 +289,6 @@ int main( int argc, char** argv )
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
@@ -305,11 +299,7 @@ int main( int argc, char** argv )
|
||||
|
||||
if ( bli_is_complex( dt ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
|
||||
@@ -46,7 +46,7 @@ int main( int argc, char** argv )
|
||||
obj_t alpha, beta;
|
||||
dim_t m, k;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
dim_t p_begin, p_max, p_inc;
|
||||
int m_input, k_input;
|
||||
ind_t ind;
|
||||
num_t dt, dt_real;
|
||||
@@ -73,7 +73,7 @@ int main( int argc, char** argv )
|
||||
ind = IND;
|
||||
|
||||
p_begin = P_BEGIN;
|
||||
p_end = P_END;
|
||||
p_max = P_MAX;
|
||||
p_inc = P_INC;
|
||||
|
||||
m_input = -1;
|
||||
@@ -118,19 +118,16 @@ int main( int argc, char** argv )
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%cherk_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%cherk_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
|
||||
|
||||
printf( "data_%s_%cherk_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
|
||||
for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_begin; p <= p_max; p += p_inc )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
|
||||
@@ -162,7 +159,6 @@ int main( int argc, char** argv )
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
#if 0 //def BLIS
|
||||
@@ -176,10 +172,8 @@ int main( int argc, char** argv )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
@@ -194,86 +188,86 @@ int main( int argc, char** argv )
|
||||
|
||||
#else
|
||||
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
|
||||
ssyrk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
ssyrk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
|
||||
dsyrk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
dsyrk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
cherk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
cherk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
zherk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
zherk_( &f77_uploc,
|
||||
&f77_transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
@@ -281,7 +275,6 @@ int main( int argc, char** argv )
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
@@ -289,11 +282,7 @@ int main( int argc, char** argv )
|
||||
|
||||
if ( bli_is_complex( dt ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%cherk_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%cherk_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
printf( "data_%s_%cherk_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
|
||||
@@ -46,7 +46,7 @@ int main( int argc, char** argv )
|
||||
obj_t alpha;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
dim_t p_begin, p_max, p_inc;
|
||||
int m_input, n_input;
|
||||
ind_t ind;
|
||||
num_t dt;
|
||||
@@ -76,7 +76,7 @@ int main( int argc, char** argv )
|
||||
ind = IND;
|
||||
|
||||
p_begin = P_BEGIN;
|
||||
p_end = P_END;
|
||||
p_max = P_MAX;
|
||||
p_inc = P_INC;
|
||||
|
||||
m_input = -1;
|
||||
@@ -133,19 +133,16 @@ int main( int argc, char** argv )
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
|
||||
|
||||
printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
|
||||
for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_begin; p <= p_max; p += p_inc )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
|
||||
@@ -188,10 +185,8 @@ int main( int argc, char** argv )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
@@ -206,86 +201,86 @@ int main( int argc, char** argv )
|
||||
|
||||
#else
|
||||
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
|
||||
strmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
strmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
|
||||
dtrmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
dtrmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
ctrmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
ctrmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
ztrmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
ztrmm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
@@ -293,7 +288,6 @@ int main( int argc, char** argv )
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
@@ -304,11 +298,7 @@ int main( int argc, char** argv )
|
||||
|
||||
if ( bli_is_complex( dt ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
|
||||
@@ -46,7 +46,7 @@ int main( int argc, char** argv )
|
||||
obj_t alpha;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
dim_t p_begin, p_max, p_inc;
|
||||
int m_input, n_input;
|
||||
ind_t ind;
|
||||
num_t dt;
|
||||
@@ -76,7 +76,7 @@ int main( int argc, char** argv )
|
||||
ind = IND;
|
||||
|
||||
p_begin = P_BEGIN;
|
||||
p_end = P_END;
|
||||
p_max = P_MAX;
|
||||
p_inc = P_INC;
|
||||
|
||||
m_input = -1;
|
||||
@@ -133,19 +133,16 @@ int main( int argc, char** argv )
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%ctrsm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
|
||||
|
||||
printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
|
||||
for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_begin; p <= p_max; p += p_inc )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
|
||||
@@ -192,10 +189,8 @@ int main( int argc, char** argv )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
@@ -210,86 +205,86 @@ int main( int argc, char** argv )
|
||||
|
||||
#else
|
||||
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
|
||||
strsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
strsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
|
||||
dtrsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
dtrsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
ctrsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
ctrsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
ztrsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
ztrsm_( &f77_side,
|
||||
&f77_uploa,
|
||||
&f77_transa,
|
||||
&f77_diaga,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
@@ -297,7 +292,6 @@ int main( int argc, char** argv )
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
@@ -308,11 +302,7 @@ int main( int argc, char** argv )
|
||||
|
||||
if ( bli_is_complex( dt ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_%s_%ctrsm_%s_blis", THR_STR, dt_ch, STR );
|
||||
#else
|
||||
printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR );
|
||||
#endif
|
||||
printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
|
||||
Reference in New Issue
Block a user