Disabled experiment-related 1m code.

Details:
- Commented out code in frame/ind/oapi/bli_l3_3m4m1m_oapi.c that was
  specifically inserted to facilitate the benchmarking of 1m block-panel
  and panel-block algorithms.
- Updates to test/3m4m/Makefile, runme.sh script, and test_gemm.c to
  reflect changes used/needed during benchmarking.
This commit is contained in:
Field G. Van Zee
2017-02-21 15:33:39 -06:00
committed by prangana
parent 4f61528d56
commit e3eb01f6b9
4 changed files with 94 additions and 23 deletions

View File

@@ -65,11 +65,18 @@ void PASTEMAC(opname,imeth) \
\
/* A temporary hack to easily specify the 1m algorithm (block-panel or
panel-block). */ \
/*
if ( PASTEMAC(opname,imeth) == bli_gemm1m ) \
{ \
bli_gemm1mbp( alpha, a, b, beta, c ); \
return; \
} \
else if ( PASTEMAC(opname,imeth) == bli_gemm3m1 ) \
{ \
bli_gemm1mpb( alpha, a, b, beta, c ); \
return; \
} \
*/ \
\
/* Initialize a local context if the one provided is NULL. */ \
bli_cntx_init_local_if2( cname, imeth, dt, cntx, cntx_p ); \

View File

@@ -107,8 +107,9 @@ BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
# BLAS library path(s). This is where the BLAS libraries reside.
HOME_LIB_PATH := $(HOME)/flame/lib
#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64
#MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
MKL_LIB_PATH := ${MKLROOT}/lib/intel64
#ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64
ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib
ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib
@@ -168,7 +169,7 @@ CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH) #-I$(ACML_INC_PATH)
LINKER := $(CC)
LDFLAGS := #-L/home/00146/field/gnu/gcc-4.8.2/lib64
LDFLAGS += -lgfortran -lm -lpthread -fopenmp
LDFLAGS += -lgfortran -lm -lrt -lpthread -fopenmp
# Datatype
@@ -211,13 +212,13 @@ STR_ST := -DTHR_STR=\"st\"
STR_MT := -DTHR_STR=\"mt\"
# Problem size specification
PDEF_ST := -DP_BEGIN=100 \
-DP_END=1000 \
-DP_INC=100
PDEF_MT := -DP_BEGIN=100 \
PDEF_ST := -DP_BEGIN=40 \
-DP_END=2000 \
-DP_INC=100
-DP_INC=40
PDEF_MT := -DP_BEGIN=200 \
-DP_END=10000 \
-DP_INC=200
@@ -296,6 +297,8 @@ openblas-gemm-st: \
test_zgemm_openblas_st.x
openblas-gemm-mt: \
test_sgemm_openblas_mt.x \
test_dgemm_openblas_mt.x \
test_cgemm_openblas_mt.x \
test_zgemm_openblas_mt.x
@@ -306,6 +309,8 @@ mkl-gemm-st: \
test_zgemm_mkl_st.x
mkl-gemm-mt: \
test_sgemm_mkl_mt.x \
test_dgemm_mkl_mt.x \
test_cgemm_mkl_mt.x \
test_zgemm_mkl_mt.x
@@ -316,6 +321,8 @@ acml-gemm-st: \
test_zgemm_acml_st.x
acml-gemm-mt: \
test_sgemm_acml_mt.x \
test_dgemm_acml_mt.x \
test_cgemm_acml_mt.x \
test_zgemm_acml_mt.x
@@ -468,6 +475,12 @@ test_z%_openblas_st.o: test_%.c
test_c%_openblas_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@
test_d%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
test_s%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
test_z%_openblas_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@
@@ -487,6 +500,12 @@ test_z%_mkl_st.o: test_%.c
test_c%_mkl_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@
test_d%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
test_s%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
test_z%_mkl_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@
@@ -506,6 +525,12 @@ test_z%_acml_st.o: test_%.c
test_c%_acml_st.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_ST) -c $< -o $@
test_d%_acml_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_MT) -c $< -o $@
test_s%_acml_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_MT) -c $< -o $@
test_z%_acml_mt.o: test_%.c
$(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ACML) $(STR_MT) -c $< -o $@

View File

@@ -4,17 +4,21 @@
exec_root="test"
out_root="output"
sys="blis"
#sys="blis"
#sys="stampede"
sys="lonestar"
#sys="wahlberg"
# Bind threads to processors.
#export OMP_PROC_BIND=true
#export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15"
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7"
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7"
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7"
#export GOMP_CPU_AFFINITY="0 2 4 6 1 3 5 7"
#export GOMP_CPU_AFFINITY="0 4 1 5 2 6 3 7"
#export GOMP_CPU_AFFINITY="0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29 32 33 36 37 40 41 44 45"
#export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23"
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
# Modify LD_LIBRARY_PATH.
if [ ${sys} = "blis" ]; then
@@ -26,6 +30,11 @@ elif [ ${sys} = "stampede" ]; then
# A hack to use libiomp5 with gcc.
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64"
elif [ ${sys} = "lonestar" ]; then
# A hack to use libiomp5 with gcc.
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64"
elif [ ${sys} = "wahlberg" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/flame/lib/acml/5.3.1/gfortran64_int64/lib"
@@ -49,6 +58,14 @@ elif [ ${sys} = "stampede" ]; then
ir_nt=1 # 1st loop
nt=16
elif [ ${sys} = "lonestar" ]; then
jc_nt=2 # 5th loop
ic_nt=12 # 3rd loop
jr_nt=1 # 2nd loop
ir_nt=1 # 1st loop
nt=24
elif [ ${sys} = "wahlberg" ]; then
jc_nt=1 # 5th loop
@@ -59,8 +76,10 @@ elif [ ${sys} = "wahlberg" ]; then
fi
# Threadedness to test.
threads="st mt" # st mt"
threads_r="st mt" # mt"
#threads="mt"
#threads_r="mt"
threads="st"
threads_r="st"
# Datatypes to test.
dts="z c"
@@ -82,6 +101,14 @@ elif [ ${sys} = "stampede" ]; then
test_impls="openblas mkl asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis"
#test_impls="openblas mkl asm_blis"
elif [ ${sys} = "lonestar" ]; then
test_impls="asm_blis 4mhw_blis 4m1a_blis 1m_blis 3m1_blis"
#test_impls="1m_blis 3m1_blis"
#test_impls="4m1a_blis"
#test_impls="mkl"
#test_impls="openblas mkl asm_blis"
elif [ ${sys} = "wahlberg" ]; then
test_impls="openblas acml asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis"
@@ -90,7 +117,8 @@ fi
# Real domain implementations to test.
#test_impls_r="openblas mkl asm_blis"
test_impls_r="openblas asm_blis"
test_impls_r="asm_blis"
#test_impls_r=""
# First perform real test cases.
for th in ${threads_r}; do
@@ -112,10 +140,11 @@ for th in ${threads_r}; do
# Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread.
#if [ ${im} = "mkl" ]; then
#
# export GOMP_CPU_AFFINITY=""
# export MKL_NUM_THREADS=${nt}
#else
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
#fi
else
@@ -124,7 +153,6 @@ for th in ${threads_r}; do
export BLIS_JR_NT=1
export BLIS_IR_NT=1
export OMP_NUM_THREADS=1
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
fi
# Construct the name of the test executable.
@@ -165,10 +193,10 @@ for th in ${threads}; do
# Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread.
#if [ ${im} = "mkl" ]; then
#
# export GOMP_CPU_AFFINITY=""
#else
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
# export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
#fi
else
@@ -177,7 +205,6 @@ for th in ${threads}; do
export BLIS_JR_NT=1
export BLIS_IR_NT=1
export OMP_NUM_THREADS=1
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
fi
# Construct the name of the test executable.

View File

@@ -49,6 +49,7 @@ int main( int argc, char** argv )
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input, k_input;
ind_t ind;
num_t dt;
char dt_ch;
int r, n_repeats;
@@ -70,6 +71,8 @@ int main( int argc, char** argv )
dt = DT;
ind = IND;
p_begin = P_BEGIN;
p_end = P_END;
p_inc = P_INC;
@@ -78,12 +81,21 @@ int main( int argc, char** argv )
n_input = -1;
k_input = -1;
#if 0
// Supress compiler warnings about unused variable 'ind'.
( void )ind;
#if 1
cntx_t cntx;
ind_t ind_mod = ind;
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
// Initialize a context for the current induced method and datatype.
bli_gemmind_cntx_init( IND, dt, &cntx );
bli_gemmind_cntx_init( ind_mod, dt, &cntx );
// Set k to the kc blocksize for the current datatype.
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, &cntx );
@@ -157,7 +169,7 @@ int main( int argc, char** argv )
#ifdef BLIS
bli_ind_disable_all_dt( dt );
bli_ind_enable_dt( IND, dt );
bli_ind_enable_dt( ind, dt );
#endif
dtime_save = DBL_MAX;