Tweaked zen2 subconfig's MC cache blocksizes.

Details:
- Updated the MC cache blocksizes registered by the 'zen2' subconfig.
- Minor updates to test/3/Makefile and test/3/runme.sh.
This commit is contained in:
Field G. Van Zee
2020-09-28 23:39:05 +00:00
parent 5efcdeffd5
commit 4fd8d9fec2
3 changed files with 43 additions and 22 deletions

View File

@@ -113,7 +113,7 @@ void bli_cntx_init_zen2( cntx_t* cntx )
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
#else #else
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
#endif #endif

View File

@@ -136,14 +136,14 @@ PS_MAX := 2400
PS_INC := 48 PS_INC := 48
# Single-socket (multithreaded) # Single-socket (multithreaded)
P1_BEGIN := 96 P1_BEGIN := 120
P1_MAX := 4800 P1_MAX := 6000
P1_INC := 96 P1_INC := 120
# Dual-socket (multithreaded) # Dual-socket (multithreaded)
P2_BEGIN := 144 P2_BEGIN := 160
P2_MAX := 7200 P2_MAX := 8000
P2_INC := 144 P2_INC := 160
# #
@@ -231,9 +231,9 @@ vendor: vendor-st vendor-1s vendor-2s
mkl: vendor mkl: vendor
armpl: vendor armpl: vendor
all-st: blis-st openblas-st mkl-st all-st: blis-st openblas-st mkl-st eigen-st
all-1s: blis-1s openblas-1s mkl-1s all-1s: blis-1s openblas-1s mkl-1s eigen-1s
all-2s: blis-2s openblas-2s mkl-2s all-2s: blis-2s openblas-2s mkl-2s eigen-2s
blis-st: blis-nat-st blis-st: blis-nat-st
blis-1s: blis-nat-1s blis-1s: blis-nat-1s

View File

@@ -5,11 +5,12 @@ exec_root="test"
out_root="output" out_root="output"
delay=0.1 delay=0.1
sys="blis" #sys="blis"
#sys="stampede2" #sys="stampede2"
#sys="lonestar5" #sys="lonestar5"
#sys="ul252" #sys="ul252"
#sys="ul264" #sys="ul264"
sys="ul2128"
# Bind threads to processors. # Bind threads to processors.
#export OMP_PROC_BIND=true #export OMP_PROC_BIND=true
@@ -18,7 +19,7 @@ sys="blis"
if [ ${sys} = "blis" ]; then if [ ${sys} = "blis" ]; then
export GOMP_CPU_AFFINITY="0 1 2 3" export GOMP_CPU_AFFINITY="0-3"
threads="jc1ic1jr1_2400 threads="jc1ic1jr1_2400
jc2ic3jr2_6000 jc2ic3jr2_6000
@@ -35,7 +36,7 @@ elif [ ${sys} = "stampede2" ]; then
elif [ ${sys} = "lonestar5" ]; then elif [ ${sys} = "lonestar5" ]; then
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23" export GOMP_CPU_AFFINITY="0-23"
# A hack to use libiomp5 with gcc. # A hack to use libiomp5 with gcc.
#export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64" #export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64"
@@ -47,7 +48,7 @@ elif [ ${sys} = "lonestar5" ]; then
elif [ ${sys} = "ul252" ]; then elif [ ${sys} = "ul252" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51" export GOMP_CPU_AFFINITY="0-51"
threads="jc1ic1jr1_2400 threads="jc1ic1jr1_2400
jc2ic13jr1_6000 jc2ic13jr1_6000
@@ -56,22 +57,39 @@ elif [ ${sys} = "ul252" ]; then
elif [ ${sys} = "ul264" ]; then elif [ ${sys} = "ul264" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63" export GOMP_CPU_AFFINITY="0-63"
threads="jc1ic1jr1_2400 threads="jc1ic1jr1_2400
jc1ic8jr4_6000 jc1ic8jr4_6000
jc2ic8jr4_8000" jc2ic8jr4_8000"
elif [ ${sys} = "ul2128" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0-127"
threads="jc1ic1jr1_2400
jc4ic4jr4_6000
jc8ic4jr4_8000"
#threads="jc4ic4jr4_6000
# jc8ic4jr4_8000"
#threads="jc1ic1jr1_2400"
#threads="jc4ic4jr4_6000"
#threads="jc8ic4jr4_8000"
fi fi
# Datatypes to test. # Datatypes to test.
test_dts="d s z c" test_dts="d s z c"
#test_dts="s"
# Operations to test. # Operations to test.
test_ops="gemm hemm herk trmm trsm" test_ops="gemm hemm herk trmm trsm"
#test_ops="herk"
# Implementations to test. # Implementations to test.
impls="blis" impls="blis"
#impls="openblas"
#impls="vendor"
#impls="other" #impls="other"
#impls="eigen" #impls="eigen"
#impls="all" #impls="all"
@@ -80,21 +98,24 @@ if [ "${impls}" = "blis" ]; then
test_impls="asm_blis" test_impls="asm_blis"
elif [ "${impls}" = "openblas" ]; then
test_impls="openblas"
elif [ "${impls}" = "vendor" ]; then
test_impls="vendor"
elif [ "${impls}" = "eigen" ]; then elif [ "${impls}" = "eigen" ]; then
test_impls="eigen" test_impls="eigen"
elif [ "${impls}" = "other" ]; then elif [ "${impls}" = "other" ]; then
test_impls="openblas vendor" test_impls="openblas vendor eigen"
elif [ "${impls}" = "eigen" ]; then
test_impls="eigen"
else else
test_impls="openblas asm_blis vendor" test_impls="openblas asm_blis vendor eigen"
fi fi
# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can # Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can