mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Add full QA with verification option, few other changes. (#331)
* add verify flag and update scripts
* replace old check_error function with the new check_err
* fix syntax
* remove blank spaces
* remove empty line
* add check_err for tensors
* fix syntax
* replace tensors with vectors in check_err calls
* fix syntax
* remove blank spaces
* fix syntax
* add new line at end of file
* disable conv2d_bwd_weight test, add gpu check
* set check_gpu using export
* check GPU using runShell
* add definition of runShell
* fix script syntax
* reduce the number of threads, add full qa option
* run processing scripts in bash
* fix the branch and host names in performance scripts, add chronos
* replace parameterizedCron with cron
* archive the perf log files
* try to fix git call
* pass branch and host names as arguments into scripts
* fix script arguments
* fix script arguments
* process results on master
* fix pipeline
* add definition of gpu_arch
* run processing scripts in docker
* fix the brackets
* add agent master for the processing stage
* get rid of show_node_info call on master
* try using mici label instead of master, disable MI100 tests for now
* fix syntax
* simplify container for results processing
* remove node(master) from the process_results stage
* put all stages in original order
* change the agent label from master to mici for gfx908
[ROCm/composable_kernel commit: d8415a96b3]
This commit is contained in:
0
script/clang-format-overwrite.sh
Normal file → Executable file
0
script/clang-format-overwrite.sh
Normal file → Executable file
@@ -85,7 +85,6 @@ def parse_logfile(logfile):
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
print("len(lst)=",len(lst),"lst:",lst)
|
||||
if len(lst)>=37: #the line is complete
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append(glue.join(lst[37:]))
|
||||
@@ -293,4 +292,4 @@ def main():
|
||||
return regression
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
16
script/process_perf_data.sh
Executable file
16
script/process_perf_data.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd need the following python packages:
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
|
||||
#process results
|
||||
gpu_arch=$1
|
||||
python3 process_perf_data.py perf_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N265_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
22
script/process_qa_data.sh
Executable file
22
script/process_qa_data.sh
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd need the following python packages:
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
|
||||
#process results
|
||||
gpu_arch=$1
|
||||
python3 process_perf_data.py perf_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N265_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_fwd_conv_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_bwd_conv_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_fusion_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_reduction_"$gpu_arch".log
|
||||
@@ -11,26 +11,34 @@ INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 2
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 2
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 2
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 2
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 -1 -1 -1 2
|
||||
41
script/profile_gemm_bilinear.sh
Executable file
41
script/profile_gemm_bilinear.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD StrideE Alpha Beta
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 -1 1 1
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD StrideE Alpha Beta
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 0 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 0 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 0 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 0 -1 1 1
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD StrideE Alpha Beta
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1000 1000 1000 -1 -1 0 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2000 2000 2000 -1 -1 0 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4000 4000 4000 -1 -1 0 -1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8000 8000 8000 -1 -1 0 -1 1 1
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD StrideE Alpha Beta
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 1056 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 2080 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128 4128 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224 8224 1 1
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD StrideE Alpha Beta
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 1088 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 2112 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160 4160 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256 8256 1 1
|
||||
@@ -1,124 +1,124 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# and make sure the following python packages are installed in your environment:
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_full_performance_tests.sh <tag for your test environment>
|
||||
|
||||
#get the test environment type:
|
||||
export env_type=$1
|
||||
echo 'Environment type ' $env_type
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verifuy correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
|
||||
#get the command line arguments:
|
||||
export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export gpu_arch=$3
|
||||
echo 'GPU architecture: ' $gpu_arch
|
||||
export branch=$4
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$5
|
||||
echo 'Host name: ' $host_name
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
git status | grep -e 'On branch' > $1;
|
||||
echo -n 'Node name: ' >>$1; hostname >> $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >>$1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type
|
||||
./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a $gemm_log
|
||||
python3 process_perf_data.py $gemm_log
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a $resnet256_log
|
||||
python3 process_perf_data.py $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a $resnet4_log
|
||||
python3 process_perf_data.py $resnet4_log
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 4 | tee -a $resnet4_log
|
||||
|
||||
#run batched_gemm tests
|
||||
export batched_gemm_log="perf_batched_gemm.log"
|
||||
print_log_header $batched_gemm_log $env_type
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
python3 process_perf_data.py $batched_gemm_log
|
||||
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
|
||||
print_log_header $batched_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
|
||||
#run grouped_gemm tests
|
||||
export grouped_gemm_log="perf_grouped_gemm.log"
|
||||
print_log_header $grouped_gemm_log $env_type
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 2 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
python3 process_perf_data.py $grouped_gemm_log
|
||||
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
|
||||
print_log_header $grouped_gemm_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
|
||||
#run fwd_conv tests
|
||||
export fwd_conv_log="perf_fwd_conv.log"
|
||||
print_log_header $fwd_conv_log $env_type
|
||||
./profile_conv.sh conv_fwd 0 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 1 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 2 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 3 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
python3 process_perf_data.py $fwd_conv_log
|
||||
export fwd_conv_log="perf_fwd_conv_${gpu_arch}.log"
|
||||
print_log_header $fwd_conv_log $env_type $branch $host_name
|
||||
./profile_conv.sh conv_fwd 0 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 1 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 2 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 3 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
|
||||
#run bwd_conv tests
|
||||
export bwd_conv_log="perf_bwd_conv.log"
|
||||
print_log_header $bwd_conv_log $env_type
|
||||
./profile_conv.sh conv2d_bwd_data 0 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 1 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 2 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 3 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
python3 process_perf_data.py $bwd_conv_log
|
||||
export bwd_conv_log="perf_bwd_conv_${gpu_arch}.log"
|
||||
print_log_header $bwd_conv_log $env_type $branch $host_name
|
||||
./profile_conv.sh conv2d_bwd_data 0 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 1 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 2 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 3 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
|
||||
#run fusion tests
|
||||
export fusion_log="perf_fusion.log"
|
||||
print_log_header $fusion_log $env_type
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 0 0 2 0 5 | tee -a $fusion_log
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 1 0 2 0 5 | tee -a $fusion_log
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 2 0 2 0 5 | tee -a $fusion_log
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 3 0 2 0 5 | tee -a $fusion_log
|
||||
python3 process_perf_data.py $fusion_log
|
||||
export fusion_log="perf_fusion_${gpu_arch}.log"
|
||||
print_log_header $fusion_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 2 0 1 | tee -a $fusion_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 2 0 1 | tee -a $fusion_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 2 0 1 | tee -a $fusion_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 2 0 1 | tee -a $fusion_log
|
||||
|
||||
#run reduction tests
|
||||
export reduction_log="perf_reduction.log"
|
||||
print_log_header $reduction_log $env_type
|
||||
./profile_reduce_with_index.sh 0 2 10 --half | tee -a $reduction_log
|
||||
./profile_reduce_no_index.sh 0 2 10 --half | tee -a $reduction_log
|
||||
python3 process_perf_data.py $reduction_log
|
||||
export reduction_log="perf_reduction_${gpu_arch}.log"
|
||||
print_log_header $reduction_log $env_type $branch $host_name
|
||||
./profile_reduce_with_index.sh $verify 2 10 --half | tee -a $reduction_log
|
||||
./profile_reduce_no_index.sh $verify 2 10 --half | tee -a $reduction_log
|
||||
|
||||
@@ -1,59 +1,62 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# and make sure the following python packages are installed in your environment:
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verify correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_performance_tests.sh <tag for your test environment>
|
||||
|
||||
#get the test environment type:
|
||||
export env_type=$1
|
||||
echo 'Environment type ' $env_type
|
||||
#get the command line arguments:
|
||||
export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export gpu_arch=$3
|
||||
echo 'GPU architecture: ' $gpu_arch
|
||||
export branch=$4
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$5
|
||||
echo 'Host name: ' $host_name
|
||||
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
git status | grep -e 'On branch' > $1;
|
||||
echo -n 'Node name: ' >>$1; hostname >> $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >>$1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type
|
||||
./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a $gemm_log
|
||||
python3 process_perf_data.py $gemm_log
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
|
||||
#run resnet50 test
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a $resnet256_log
|
||||
python3 process_perf_data.py $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a $resnet4_log
|
||||
python3 process_perf_data.py $resnet4_log
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 4 | tee -a $resnet4_log
|
||||
|
||||
Reference in New Issue
Block a user