Register l3 sup kernels in zen2 subconfig.

Details:
- Registered full suite of sgemm and dgemm sup millikernels, blocksizes,
  and crossover thresholds in bli_cntx_init_zen2.c.
- Minor updates to test/sup/runme.sh for running on Zen2 Epyc 7742
  system.
This commit is contained in:
Field G. Van Zee
2020-10-09 20:22:17 +00:00
parent d98368c32d
commit a0849d390d
2 changed files with 104 additions and 4 deletions

View File

@@ -38,6 +38,7 @@
void bli_cntx_init_zen2( cntx_t* cntx )
{
blksz_t blkszs[ BLIS_NUM_BLKSZS ];
blksz_t thresh[ BLIS_NUM_THRESH ];
// Set default kernel blocksizes and functions.
bli_cntx_init_zen2_ref( cntx );
@@ -137,5 +138,87 @@ void bli_cntx_init_zen2( cntx_t* cntx )
BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF,
cntx
);
// -------------------------------------------------------------------------
// Initialize sup thresholds with architecture-appropriate values.
// s d c z
#if 1
bli_blksz_init_easy( &thresh[ BLIS_MT ], 500, 249, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_NT ], 500, 249, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_KT ], 500, 249, -1, -1 );
#else
bli_blksz_init_easy( &thresh[ BLIS_MT ], 100000, 100000, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_NT ], 100000, 100000, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_KT ], 100000, 100000, -1, -1 );
#endif
// Initialize the context with the sup thresholds.
bli_cntx_set_l3_sup_thresh
(
3,
BLIS_MT, &thresh[ BLIS_MT ],
BLIS_NT, &thresh[ BLIS_NT ],
BLIS_KT, &thresh[ BLIS_KT ],
cntx
);
#if 0
// Initialize the context with the sup handlers.
bli_cntx_set_l3_sup_handlers
(
1,
BLIS_GEMM, bli_gemmsup_ref,
cntx
);
#endif
// Update the context with optimized small/unpacked gemm kernels.
bli_cntx_set_l3_sup_kers
(
16,
//BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref,
BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE,
BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE,
BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
BLIS_RRR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, TRUE,
BLIS_RRC, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16m, TRUE,
BLIS_RCR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, TRUE,
BLIS_RCC, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, TRUE,
BLIS_CRR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, TRUE,
BLIS_CRC, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16n, TRUE,
BLIS_CCR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, TRUE,
BLIS_CCC, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, TRUE,
cntx
);
// Initialize level-3 sup blocksize objects with architecture-specific
// values.
// s d c z
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 6, -1, -1,
9, 9, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, -1, -1 );
// Update the context with the current architecture's register and cache
// blocksizes for small/unpacked level-3 problems.
bli_cntx_set_l3_sup_blkszs
(
5,
BLIS_NC, &blkszs[ BLIS_NC ],
BLIS_KC, &blkszs[ BLIS_KC ],
BLIS_MC, &blkszs[ BLIS_MC ],
BLIS_NR, &blkszs[ BLIS_NR ],
BLIS_MR, &blkszs[ BLIS_MR ],
cntx
);
}

View File

@@ -4,31 +4,48 @@
exec_root="test"
out_root="output"
sys="blis"
#sys="blis"
#sys="lonestar5"
#sys="ul252"
#sys="ul264"
sys="ul2128"
if [ ${sys} = "blis" ]; then
export GOMP_CPU_AFFINITY="0-3"
numactl=""
nt=4
elif [ ${sys} = "lonestar5" ]; then
export GOMP_CPU_AFFINITY="0-23"
numactl=""
nt=12
elif [ ${sys} = "ul252" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0-51"
numactl="numactl --interleave=all"
nt=26
elif [ ${sys} = "ul264" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0-63"
numactl="numactl --interleave=all"
nt=32
elif [ ${sys} = "ul2128" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0-127"
numactl="numactl --interleave=all"
nt=32
fi
@@ -94,7 +111,7 @@ pcombos="uu"
impls="vendor blissup blisconv openblas eigen blasfeo libxsmm"
#impls="vendor blissup blisconv openblas eigen"
#impls="vendor"
impls="blissup"
#impls="blissup"
#impls="blisconv"
#impls="openblas"
#impls="eigen"
@@ -310,10 +327,10 @@ for th in ${threads}; do
# Construct the name of the output file.
out_file="${out_root}_${th}_${dt}${op}_${tr}_${st}_${shstr}_${ldstr}_${packstr}_${im}.m"
echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}"
echo "Running (nt = ${nt_use}) ${numactl} ./${exec_name} > ${out_file}"
# Run executable.
./${exec_name} > ${out_file}
${numactl} ./${exec_name} > ${out_file}
sleep ${delay}