mirror of
https://github.com/amd/blis.git
synced 2026-03-22 02:07:21 +00:00
Register l3 sup kernels in zen2 subconfig.
Details: - Registered full suite of sgemm and dgemm sup millikernels, blocksizes, and crossover thresholds in bli_cntx_init_zen2.c. - Minor updates to test/sup/runme.sh for running on Zen2 Epyc 7742 system.
This commit is contained in:
@@ -38,6 +38,7 @@
|
||||
void bli_cntx_init_zen2( cntx_t* cntx )
|
||||
{
|
||||
blksz_t blkszs[ BLIS_NUM_BLKSZS ];
|
||||
blksz_t thresh[ BLIS_NUM_THRESH ];
|
||||
|
||||
// Set default kernel blocksizes and functions.
|
||||
bli_cntx_init_zen2_ref( cntx );
|
||||
@@ -137,5 +138,87 @@ void bli_cntx_init_zen2( cntx_t* cntx )
|
||||
BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF,
|
||||
cntx
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Initialize sup thresholds with architecture-appropriate values.
|
||||
// s d c z
|
||||
#if 1
|
||||
bli_blksz_init_easy( &thresh[ BLIS_MT ], 500, 249, -1, -1 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_NT ], 500, 249, -1, -1 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_KT ], 500, 249, -1, -1 );
|
||||
#else
|
||||
bli_blksz_init_easy( &thresh[ BLIS_MT ], 100000, 100000, -1, -1 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_NT ], 100000, 100000, -1, -1 );
|
||||
bli_blksz_init_easy( &thresh[ BLIS_KT ], 100000, 100000, -1, -1 );
|
||||
#endif
|
||||
|
||||
// Initialize the context with the sup thresholds.
|
||||
bli_cntx_set_l3_sup_thresh
|
||||
(
|
||||
3,
|
||||
BLIS_MT, &thresh[ BLIS_MT ],
|
||||
BLIS_NT, &thresh[ BLIS_NT ],
|
||||
BLIS_KT, &thresh[ BLIS_KT ],
|
||||
cntx
|
||||
);
|
||||
|
||||
#if 0
|
||||
// Initialize the context with the sup handlers.
|
||||
bli_cntx_set_l3_sup_handlers
|
||||
(
|
||||
1,
|
||||
BLIS_GEMM, bli_gemmsup_ref,
|
||||
cntx
|
||||
);
|
||||
#endif
|
||||
|
||||
// Update the context with optimized small/unpacked gemm kernels.
|
||||
bli_cntx_set_l3_sup_kers
|
||||
(
|
||||
16,
|
||||
//BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref,
|
||||
BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
|
||||
BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE,
|
||||
BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
|
||||
BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
|
||||
BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE,
|
||||
BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE,
|
||||
BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
|
||||
BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE,
|
||||
|
||||
BLIS_RRR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, TRUE,
|
||||
BLIS_RRC, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16m, TRUE,
|
||||
BLIS_RCR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, TRUE,
|
||||
BLIS_RCC, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, TRUE,
|
||||
BLIS_CRR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, TRUE,
|
||||
BLIS_CRC, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16n, TRUE,
|
||||
BLIS_CCR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, TRUE,
|
||||
BLIS_CCC, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, TRUE,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Initialize level-3 sup blocksize objects with architecture-specific
|
||||
// values.
|
||||
// s d c z
|
||||
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 6, -1, -1,
|
||||
9, 9, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, -1, -1 );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes for small/unpacked level-3 problems.
|
||||
bli_cntx_set_l3_sup_blkszs
|
||||
(
|
||||
5,
|
||||
BLIS_NC, &blkszs[ BLIS_NC ],
|
||||
BLIS_KC, &blkszs[ BLIS_KC ],
|
||||
BLIS_MC, &blkszs[ BLIS_MC ],
|
||||
BLIS_NR, &blkszs[ BLIS_NR ],
|
||||
BLIS_MR, &blkszs[ BLIS_MR ],
|
||||
cntx
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,31 +4,48 @@
|
||||
exec_root="test"
|
||||
out_root="output"
|
||||
|
||||
sys="blis"
|
||||
#sys="blis"
|
||||
#sys="lonestar5"
|
||||
#sys="ul252"
|
||||
#sys="ul264"
|
||||
sys="ul2128"
|
||||
|
||||
if [ ${sys} = "blis" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0-3"
|
||||
|
||||
numactl=""
|
||||
nt=4
|
||||
|
||||
elif [ ${sys} = "lonestar5" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0-23"
|
||||
|
||||
numactl=""
|
||||
nt=12
|
||||
|
||||
elif [ ${sys} = "ul252" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0-51"
|
||||
|
||||
numactl="numactl --interleave=all"
|
||||
nt=26
|
||||
|
||||
elif [ ${sys} = "ul264" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0-63"
|
||||
|
||||
numactl="numactl --interleave=all"
|
||||
nt=32
|
||||
|
||||
elif [ ${sys} = "ul2128" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0-127"
|
||||
|
||||
numactl="numactl --interleave=all"
|
||||
nt=32
|
||||
|
||||
fi
|
||||
@@ -94,7 +111,7 @@ pcombos="uu"
|
||||
impls="vendor blissup blisconv openblas eigen blasfeo libxsmm"
|
||||
#impls="vendor blissup blisconv openblas eigen"
|
||||
#impls="vendor"
|
||||
impls="blissup"
|
||||
#impls="blissup"
|
||||
#impls="blisconv"
|
||||
#impls="openblas"
|
||||
#impls="eigen"
|
||||
@@ -310,10 +327,10 @@ for th in ${threads}; do
|
||||
# Construct the name of the output file.
|
||||
out_file="${out_root}_${th}_${dt}${op}_${tr}_${st}_${shstr}_${ldstr}_${packstr}_${im}.m"
|
||||
|
||||
echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}"
|
||||
echo "Running (nt = ${nt_use}) ${numactl} ./${exec_name} > ${out_file}"
|
||||
|
||||
# Run executable.
|
||||
./${exec_name} > ${out_file}
|
||||
${numactl} ./${exec_name} > ${out_file}
|
||||
|
||||
sleep ${delay}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user