Files
blis/test/3/runme.sh
Field G. Van Zee 92fb9c87bf Add more support for Eigen to drivers in test/3.
Details:
- Use compile-time implementations of Eigen in test_gemm.c via new
  EIGEN cpp macro, defined on command line. (Linking to Eigen's BLAS
  library is not necessary.) However, as of Eigen 3.3.7, Eigen only
  parallelizes the gemm operation and not hemm, herk, trmm, trsm, or
  any other level-3 operation.
- Fixed a bug in trmm and trsm drivers whereby the wrong function
  (bli_does_trans()) was being called to determine whether the object
  for matrix A should be created for a left- or right-side case. This
  was corrected by changing the function to bli_is_left(), as is done
  in the hemm driver.
- Added support for running Eigen test drivers from runme.sh.
2019-03-26 15:43:23 -05:00

231 lines
6.3 KiB
Bash
Executable File

#!/bin/bash
# File pefixes.
exec_root="test"
out_root="output"
delay=0.1
sys="blis"
#sys="stampede2"
#sys="lonestar5"
#sys="ul252"
#sys="ul264"
# Bind threads to processors.
#export OMP_PROC_BIND=true
#export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23"
#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103"
if [ ${sys} = "blis" ]; then
export GOMP_CPU_AFFINITY="0 1 2 3"
threads="jc1ic1jr1_2400
jc2ic3jr2_6000
jc4ic3jr2_8000"
elif [ ${sys} = "stampede2" ]; then
echo "Need to set GOMP_CPU_AFFINITY."
exit 1
threads="jc1ic1jr1_2400
jc4ic6jr1_6000
jc4ic12jr1_8000"
elif [ ${sys} = "lonestar5" ]; then
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
# A hack to use libiomp5 with gcc.
#export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64"
threads="jc1ic1jr1_2400
jc2ic3jr2_6000
jc4ic3jr2_8000"
elif [ ${sys} = "ul252" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51"
threads="jc1ic1jr1_2400
jc2ic13jr1_6000
jc4ic13jr1_8000"
elif [ ${sys} = "ul264" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63"
threads="jc1ic1jr1_2400
jc1ic8jr4_6000
jc2ic8jr4_8000"
fi
# Datatypes to test.
test_dts="d s z c"
# Operations to test.
test_ops="gemm hemm herk trmm trsm"
# Implementations to test.
#impls="blis"
#impls="other"
impls="eigen"
#impls="all"
if [ "${impls}" = "blis" ]; then
test_impls="asm_blis"
elif [ "${impls}" = "eigen" ]; then
test_impls="eigen"
elif [ "${impls}" = "other" ]; then
test_impls="openblas vendor"
elif [ "${impls}" = "eigen" ]; then
test_impls="eigen"
else
test_impls="openblas asm_blis vendor"
fi
# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can
# restore the value.
GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY}
# First perform real test cases.
for th in ${threads}; do
# Start with one way of parallelism in each loop. We will now begin
# parsing the 'th' variable to update one or more of these threading
# parameters.
jc_nt=1; pc_nt=1; ic_nt=1; jr_nt=1; ir_nt=1
# Strip everything before and after the underscore so that what remains
# is the problem size and threading parameter string, respectively.
psize=${th##*_}; thinfo=${th%%_*}
# Identify each threading parameter and insert a space before it.
thsep=$(echo -e ${thinfo} | sed -e "s/\([jip][cr]\)/ \1/g" )
nt=1
for loopnum in ${thsep}; do
# Given the current string, which identifies a loop and the
# number of ways of parallelism for that loop, strip out
# the ways and loop separately to identify each.
loop=$(echo -e ${loopnum} | sed -e "s/[0-9]//g" )
num=$(echo -e ${loopnum} | sed -e "s/[a-z]//g" )
# Construct a string that we can evaluate to set the number
# of ways of parallelism for the current loop.
loop_nt_eq_num="${loop}_nt=${num}"
# Update the total number of threads.
nt=$(expr ${nt} \* ${num})
# Evaluate the string to assign the ways to the variable.
eval ${loop_nt_eq_num}
done
echo "Switching to: jc${jc_nt} pc${pc_nt} ic${ic_nt} jr${jr_nt} ir${ir_nt} (nt = ${nt}) p_max${psize}"
for dt in ${test_dts}; do
for im in ${test_impls}; do
for op in ${test_ops}; do
# Eigen does not support multithreading for hemm, herk, trmm,
# or trsm. So if we're getting ready to execute an Eigen driver
# for one of these operations and nt > 1, we skip this test.
if [ "${im}" = "eigen" ] && \
[ "${op}" != "gemm" ] && \
[ "${nt}" != "1" ]; then
continue;
fi
# Find the threading suffix by probing the executable.
binname=$(ls ${exec_root}_${dt}${op}_${psize}_${im}_*.x)
suf_ext=${binname##*_}
suf=${suf_ext%%.*}
#echo "found file: ${binname} with suffix ${suf}"
# Set the number of threads according to th.
if [ "${suf}" = "1s" ] || [ "${suf}" = "2s" ]; then
# Set the threading parameters based on the implementation
# that we are preparing to run.
if [ "${im}" = "asm_blis" ]; then
unset OMP_NUM_THREADS
export BLIS_JC_NT=${jc_nt}
export BLIS_PC_NT=${pc_nt}
export BLIS_IC_NT=${ic_nt}
export BLIS_JR_NT=${jr_nt}
export BLIS_IR_NT=${ir_nt}
elif [ "${im}" = "openblas" ]; then
unset OMP_NUM_THREADS
export OPENBLAS_NUM_THREADS=${nt}
elif [ "${im}" = "eigen" ]; then
export OMP_NUM_THREADS=${nt}
elif [ "${im}" = "vendor" ]; then
unset OMP_NUM_THREADS
export MKL_NUM_THREADS=${nt}
fi
export nt_use=${nt}
# Multithreaded OpenBLAS seems to have a problem running
# properly if GOMP_CPU_AFFINITY is set. So we temporarily
# unset it here if we are about to execute OpenBLAS, but
# otherwise restore it.
if [ ${im} = "openblas" ]; then
unset GOMP_CPU_AFFINITY
else
export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}"
fi
else
export BLIS_JC_NT=1
export BLIS_PC_NT=1
export BLIS_IC_NT=1
export BLIS_JR_NT=1
export BLIS_IR_NT=1
export OMP_NUM_THREADS=1
export OPENBLAS_NUM_THREADS=1
export MKL_NUM_THREADS=1
export nt_use=1
fi
# Construct the name of the test executable.
exec_name="${exec_root}_${dt}${op}_${psize}_${im}_${suf}.x"
# Construct the name of the output file.
out_file="${out_root}_${suf}_${dt}${op}_${im}.m"
#echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}"
echo "Running ./${exec_name} > ${out_file}"
# Run executable.
./${exec_name} > ${out_file}
sleep ${delay}
done
done
done
done