mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Disabled experiment-related 1m code.
Details: - Commented out code in frame/ind/oapi/bli_l3_3m4m1m_oapi.c that was specifically inserted to facilitate the benchmarking of 1m block-panel and panel-block algorithms. - Updates to test/3m4m/Makefile, runme.sh script, and test_gemm.c to reflect changes used/needed during benchmarking.
This commit is contained in:
committed by
prangana
parent
4f61528d56
commit
e3eb01f6b9
@@ -65,11 +65,18 @@ void PASTEMAC(opname,imeth) \
|
||||
\
|
||||
/* A temporary hack to easily specify the 1m algorithm (block-panel or
|
||||
panel-block). */ \
|
||||
/*
|
||||
if ( PASTEMAC(opname,imeth) == bli_gemm1m ) \
|
||||
{ \
|
||||
bli_gemm1mbp( alpha, a, b, beta, c ); \
|
||||
return; \
|
||||
} \
|
||||
else if ( PASTEMAC(opname,imeth) == bli_gemm3m1 ) \
|
||||
{ \
|
||||
bli_gemm1mpb( alpha, a, b, beta, c ); \
|
||||
return; \
|
||||
} \
|
||||
*/ \
|
||||
\
|
||||
/* Initialize a local context if the one provided is NULL. */ \
|
||||
bli_cntx_init_local_if2( cname, imeth, dt, cntx, cntx_p ); \
|
||||
|
||||
@@ -107,8 +107,9 @@ BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
|
||||
# BLAS library path(s). This is where the BLAS libraries reside.
|
||||
HOME_LIB_PATH := $(HOME)/flame/lib
|
||||
#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64
|
||||
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
|
||||
ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64
|
||||
#MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
|
||||
MKL_LIB_PATH := ${MKLROOT}/lib/intel64
|
||||
#ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64
|
||||
ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib
|
||||
ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib
|
||||
|
||||
@@ -168,7 +169,7 @@ CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH) #-I$(ACML_INC_PATH)
|
||||
|
||||
LINKER := $(CC)
|
||||
LDFLAGS := #-L/home/00146/field/gnu/gcc-4.8.2/lib64
|
||||
LDFLAGS += -lgfortran -lm -lpthread -fopenmp
|
||||
LDFLAGS += -lgfortran -lm -lrt -lpthread -fopenmp
|
||||
|
||||
|
||||
# Datatype
|
||||
@@ -211,13 +212,13 @@ STR_ST := -DTHR_STR=\"st\"
|
||||
STR_MT := -DTHR_STR=\"mt\"
|
||||
|
||||
# Problem size specification
|
||||
PDEF_ST := -DP_BEGIN=100 \
|
||||
-DP_END=1000 \
|
||||
-DP_INC=100
|
||||
|
||||
PDEF_MT := -DP_BEGIN=100 \
|
||||
PDEF_ST := -DP_BEGIN=40 \
|
||||
-DP_END=2000 \
|
||||
-DP_INC=100
|
||||
-DP_INC=40
|
||||
|
||||
PDEF_MT := -DP_BEGIN=200 \
|
||||
-DP_END=10000 \
|
||||
-DP_INC=200
|
||||
|
||||
|
||||
|
||||
@@ -296,6 +297,8 @@ openblas-gemm-st: \
|
||||
test_zgemm_openblas_st.x
|
||||
|
||||
openblas-gemm-mt: \
|
||||
test_sgemm_openblas_mt.x \
|
||||
test_dgemm_openblas_mt.x \
|
||||
test_cgemm_openblas_mt.x \
|
||||
test_zgemm_openblas_mt.x
|
||||
|
||||
@@ -306,6 +309,8 @@ mkl-gemm-st: \
|
||||
test_zgemm_mkl_st.x
|
||||
|
||||
mkl-gemm-mt: \
|
||||
test_sgemm_mkl_mt.x \
|
||||
test_dgemm_mkl_mt.x \
|
||||
test_cgemm_mkl_mt.x \
|
||||
test_zgemm_mkl_mt.x
|
||||
|
||||
@@ -316,6 +321,8 @@ acml-gemm-st: \
|
||||
test_zgemm_acml_st.x
|
||||
|
||||
acml-gemm-mt: \
|
||||
test_sgemm_acml_mt.x \
|
||||
test_dgemm_acml_mt.x \
|
||||
test_cgemm_acml_mt.x \
|
||||
test_zgemm_acml_mt.x
|
||||
|
||||
@@ -468,6 +475,12 @@ test_z%_openblas_st.o: test_%.c
|
||||
test_c%_openblas_st.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_d%_openblas_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_s%_openblas_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_z%_openblas_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
|
||||
|
||||
@@ -487,6 +500,12 @@ test_z%_mkl_st.o: test_%.c
|
||||
test_c%_mkl_st.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_d%_mkl_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_s%_mkl_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_z%_mkl_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
|
||||
|
||||
@@ -506,6 +525,12 @@ test_z%_acml_st.o: test_%.c
|
||||
test_c%_acml_st.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_ST) -c $< -o $@
|
||||
|
||||
test_d%_acml_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_s%_acml_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_MT) -c $< -o $@
|
||||
|
||||
test_z%_acml_mt.o: test_%.c
|
||||
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_MT) -c $< -o $@
|
||||
|
||||
|
||||
@@ -4,17 +4,21 @@
|
||||
exec_root="test"
|
||||
out_root="output"
|
||||
|
||||
sys="blis"
|
||||
#sys="blis"
|
||||
#sys="stampede"
|
||||
sys="lonestar"
|
||||
#sys="wahlberg"
|
||||
|
||||
# Bind threads to processors.
|
||||
#export OMP_PROC_BIND=true
|
||||
#export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15"
|
||||
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7"
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7"
|
||||
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7"
|
||||
#export GOMP_CPU_AFFINITY="0 2 4 6 1 3 5 7"
|
||||
#export GOMP_CPU_AFFINITY="0 4 1 5 2 6 3 7"
|
||||
#export GOMP_CPU_AFFINITY="0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29 32 33 36 37 40 41 44 45"
|
||||
#export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23"
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
|
||||
|
||||
# Modify LD_LIBRARY_PATH.
|
||||
if [ ${sys} = "blis" ]; then
|
||||
@@ -26,6 +30,11 @@ elif [ ${sys} = "stampede" ]; then
|
||||
# A hack to use libiomp5 with gcc.
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64"
|
||||
|
||||
elif [ ${sys} = "lonestar" ]; then
|
||||
|
||||
# A hack to use libiomp5 with gcc.
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64"
|
||||
|
||||
elif [ ${sys} = "wahlberg" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/flame/lib/acml/5.3.1/gfortran64_int64/lib"
|
||||
@@ -49,6 +58,14 @@ elif [ ${sys} = "stampede" ]; then
|
||||
ir_nt=1 # 1st loop
|
||||
nt=16
|
||||
|
||||
elif [ ${sys} = "lonestar" ]; then
|
||||
|
||||
jc_nt=2 # 5th loop
|
||||
ic_nt=12 # 3rd loop
|
||||
jr_nt=1 # 2nd loop
|
||||
ir_nt=1 # 1st loop
|
||||
nt=24
|
||||
|
||||
elif [ ${sys} = "wahlberg" ]; then
|
||||
|
||||
jc_nt=1 # 5th loop
|
||||
@@ -59,8 +76,10 @@ elif [ ${sys} = "wahlberg" ]; then
|
||||
fi
|
||||
|
||||
# Threadedness to test.
|
||||
threads="st mt" # st mt"
|
||||
threads_r="st mt" # mt"
|
||||
#threads="mt"
|
||||
#threads_r="mt"
|
||||
threads="st"
|
||||
threads_r="st"
|
||||
|
||||
# Datatypes to test.
|
||||
dts="z c"
|
||||
@@ -82,6 +101,14 @@ elif [ ${sys} = "stampede" ]; then
|
||||
test_impls="openblas mkl asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis"
|
||||
#test_impls="openblas mkl asm_blis"
|
||||
|
||||
elif [ ${sys} = "lonestar" ]; then
|
||||
|
||||
test_impls="asm_blis 4mhw_blis 4m1a_blis 1m_blis 3m1_blis"
|
||||
#test_impls="1m_blis 3m1_blis"
|
||||
#test_impls="4m1a_blis"
|
||||
#test_impls="mkl"
|
||||
#test_impls="openblas mkl asm_blis"
|
||||
|
||||
elif [ ${sys} = "wahlberg" ]; then
|
||||
|
||||
test_impls="openblas acml asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis"
|
||||
@@ -90,7 +117,8 @@ fi
|
||||
|
||||
# Real domain implementations to test.
|
||||
#test_impls_r="openblas mkl asm_blis"
|
||||
test_impls_r="openblas asm_blis"
|
||||
test_impls_r="asm_blis"
|
||||
#test_impls_r=""
|
||||
|
||||
# First perform real test cases.
|
||||
for th in ${threads_r}; do
|
||||
@@ -112,10 +140,11 @@ for th in ${threads_r}; do
|
||||
|
||||
# Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread.
|
||||
#if [ ${im} = "mkl" ]; then
|
||||
#
|
||||
|
||||
# export GOMP_CPU_AFFINITY=""
|
||||
# export MKL_NUM_THREADS=${nt}
|
||||
#else
|
||||
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
|
||||
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
|
||||
#fi
|
||||
else
|
||||
|
||||
@@ -124,7 +153,6 @@ for th in ${threads_r}; do
|
||||
export BLIS_JR_NT=1
|
||||
export BLIS_IR_NT=1
|
||||
export OMP_NUM_THREADS=1
|
||||
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
|
||||
fi
|
||||
|
||||
# Construct the name of the test executable.
|
||||
@@ -165,10 +193,10 @@ for th in ${threads}; do
|
||||
|
||||
# Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread.
|
||||
#if [ ${im} = "mkl" ]; then
|
||||
#
|
||||
|
||||
# export GOMP_CPU_AFFINITY=""
|
||||
#else
|
||||
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
|
||||
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
|
||||
#fi
|
||||
else
|
||||
|
||||
@@ -177,7 +205,6 @@ for th in ${threads}; do
|
||||
export BLIS_JR_NT=1
|
||||
export BLIS_IR_NT=1
|
||||
export OMP_NUM_THREADS=1
|
||||
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
|
||||
fi
|
||||
|
||||
# Construct the name of the test executable.
|
||||
|
||||
@@ -49,6 +49,7 @@ int main( int argc, char** argv )
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input, k_input;
|
||||
ind_t ind;
|
||||
num_t dt;
|
||||
char dt_ch;
|
||||
int r, n_repeats;
|
||||
@@ -70,6 +71,8 @@ int main( int argc, char** argv )
|
||||
|
||||
dt = DT;
|
||||
|
||||
ind = IND;
|
||||
|
||||
p_begin = P_BEGIN;
|
||||
p_end = P_END;
|
||||
p_inc = P_INC;
|
||||
@@ -78,12 +81,21 @@ int main( int argc, char** argv )
|
||||
n_input = -1;
|
||||
k_input = -1;
|
||||
|
||||
#if 0
|
||||
|
||||
// Supress compiler warnings about unused variable 'ind'.
|
||||
( void )ind;
|
||||
|
||||
#if 1
|
||||
|
||||
cntx_t cntx;
|
||||
|
||||
ind_t ind_mod = ind;
|
||||
|
||||
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
|
||||
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
|
||||
|
||||
// Initialize a context for the current induced method and datatype.
|
||||
bli_gemmind_cntx_init( IND, dt, &cntx );
|
||||
bli_gemmind_cntx_init( ind_mod, dt, &cntx );
|
||||
|
||||
// Set k to the kc blocksize for the current datatype.
|
||||
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, &cntx );
|
||||
@@ -157,7 +169,7 @@ int main( int argc, char** argv )
|
||||
|
||||
#ifdef BLIS
|
||||
bli_ind_disable_all_dt( dt );
|
||||
bli_ind_enable_dt( IND, dt );
|
||||
bli_ind_enable_dt( ind, dt );
|
||||
#endif
|
||||
|
||||
dtime_save = DBL_MAX;
|
||||
|
||||
Reference in New Issue
Block a user