mirror of
https://github.com/amd/blis.git
synced 2026-03-21 17:57:23 +00:00
Tweaked zen2 subconfig's MC cache blocksizes.
Details: - Updated the MC cache blocksizes registered by the 'zen2' subconfig. - Minor updates to test/3/Makefile and test/3/runme.sh.
This commit is contained in:
@@ -113,7 +113,7 @@ void bli_cntx_init_zen2( cntx_t* cntx )
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
|
||||
#else
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
|
||||
#endif
|
||||
|
||||
@@ -136,14 +136,14 @@ PS_MAX := 2400
|
||||
PS_INC := 48
|
||||
|
||||
# Single-socket (multithreaded)
|
||||
P1_BEGIN := 96
|
||||
P1_MAX := 4800
|
||||
P1_INC := 96
|
||||
P1_BEGIN := 120
|
||||
P1_MAX := 6000
|
||||
P1_INC := 120
|
||||
|
||||
# Dual-socket (multithreaded)
|
||||
P2_BEGIN := 144
|
||||
P2_MAX := 7200
|
||||
P2_INC := 144
|
||||
P2_BEGIN := 160
|
||||
P2_MAX := 8000
|
||||
P2_INC := 160
|
||||
|
||||
|
||||
#
|
||||
@@ -231,9 +231,9 @@ vendor: vendor-st vendor-1s vendor-2s
|
||||
mkl: vendor
|
||||
armpl: vendor
|
||||
|
||||
all-st: blis-st openblas-st mkl-st
|
||||
all-1s: blis-1s openblas-1s mkl-1s
|
||||
all-2s: blis-2s openblas-2s mkl-2s
|
||||
all-st: blis-st openblas-st mkl-st eigen-st
|
||||
all-1s: blis-1s openblas-1s mkl-1s eigen-1s
|
||||
all-2s: blis-2s openblas-2s mkl-2s eigen-2s
|
||||
|
||||
blis-st: blis-nat-st
|
||||
blis-1s: blis-nat-1s
|
||||
|
||||
@@ -5,11 +5,12 @@ exec_root="test"
|
||||
out_root="output"
|
||||
delay=0.1
|
||||
|
||||
sys="blis"
|
||||
#sys="blis"
|
||||
#sys="stampede2"
|
||||
#sys="lonestar5"
|
||||
#sys="ul252"
|
||||
#sys="ul264"
|
||||
sys="ul2128"
|
||||
|
||||
# Bind threads to processors.
|
||||
#export OMP_PROC_BIND=true
|
||||
@@ -18,7 +19,7 @@ sys="blis"
|
||||
|
||||
if [ ${sys} = "blis" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3"
|
||||
export GOMP_CPU_AFFINITY="0-3"
|
||||
|
||||
threads="jc1ic1jr1_2400
|
||||
jc2ic3jr2_6000
|
||||
@@ -35,7 +36,7 @@ elif [ ${sys} = "stampede2" ]; then
|
||||
|
||||
elif [ ${sys} = "lonestar5" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
|
||||
export GOMP_CPU_AFFINITY="0-23"
|
||||
|
||||
# A hack to use libiomp5 with gcc.
|
||||
#export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64"
|
||||
@@ -47,7 +48,7 @@ elif [ ${sys} = "lonestar5" ]; then
|
||||
elif [ ${sys} = "ul252" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51"
|
||||
export GOMP_CPU_AFFINITY="0-51"
|
||||
|
||||
threads="jc1ic1jr1_2400
|
||||
jc2ic13jr1_6000
|
||||
@@ -56,22 +57,39 @@ elif [ ${sys} = "ul252" ]; then
|
||||
elif [ ${sys} = "ul264" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63"
|
||||
export GOMP_CPU_AFFINITY="0-63"
|
||||
|
||||
threads="jc1ic1jr1_2400
|
||||
jc1ic8jr4_6000
|
||||
jc2ic8jr4_8000"
|
||||
|
||||
elif [ ${sys} = "ul2128" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0-127"
|
||||
|
||||
threads="jc1ic1jr1_2400
|
||||
jc4ic4jr4_6000
|
||||
jc8ic4jr4_8000"
|
||||
#threads="jc4ic4jr4_6000
|
||||
# jc8ic4jr4_8000"
|
||||
#threads="jc1ic1jr1_2400"
|
||||
#threads="jc4ic4jr4_6000"
|
||||
#threads="jc8ic4jr4_8000"
|
||||
fi
|
||||
|
||||
# Datatypes to test.
|
||||
test_dts="d s z c"
|
||||
#test_dts="s"
|
||||
|
||||
# Operations to test.
|
||||
test_ops="gemm hemm herk trmm trsm"
|
||||
#test_ops="herk"
|
||||
|
||||
# Implementations to test.
|
||||
impls="blis"
|
||||
#impls="openblas"
|
||||
#impls="vendor"
|
||||
#impls="other"
|
||||
#impls="eigen"
|
||||
#impls="all"
|
||||
@@ -80,21 +98,24 @@ if [ "${impls}" = "blis" ]; then
|
||||
|
||||
test_impls="asm_blis"
|
||||
|
||||
elif [ "${impls}" = "openblas" ]; then
|
||||
|
||||
test_impls="openblas"
|
||||
|
||||
elif [ "${impls}" = "vendor" ]; then
|
||||
|
||||
test_impls="vendor"
|
||||
|
||||
elif [ "${impls}" = "eigen" ]; then
|
||||
|
||||
test_impls="eigen"
|
||||
|
||||
elif [ "${impls}" = "other" ]; then
|
||||
|
||||
test_impls="openblas vendor"
|
||||
|
||||
elif [ "${impls}" = "eigen" ]; then
|
||||
|
||||
test_impls="eigen"
|
||||
|
||||
test_impls="openblas vendor eigen"
|
||||
else
|
||||
|
||||
test_impls="openblas asm_blis vendor"
|
||||
test_impls="openblas asm_blis vendor eigen"
|
||||
fi
|
||||
|
||||
# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can
|
||||
|
||||
Reference in New Issue
Block a user