mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Build the CK targets only once. (#433)
* build CK only once, use deb package in all subsequent stages
* update jenkins file
* change prefix for build_CK stage
* update writing deb metadata to control file
* update ubuntu source for docker, script syntax for deb package metadata
* try different way to create deb metadata
* clean up DEBIAN before creating one
* fix the CI folder names, fix splitK qa
* use correct docker in all stages, separate tests for splitK verification and performance
* clean old comments, change dir before packaging
* use different package syntax
* change packaging syntax
* package with cmake
* remove unnecessary build prefix
* get rid of unnecessary paths
* change paths during unpacking
* change script syntax while unpacking
* get rid of unneccesary steps
* get rid of comments in the scripts
* use double quotes for scripts
* add ccache during build, try dpkg -x
* pull and install each package separately
* use full package names
* try to use stashing for packages
* change stash/unstash syntax
* move unstash out of shell, run tests on any gpu node
* unpack each package separately
* try re-using existing workspace
* merge the build and test stages, only stash ckProfiler
* merge the build and test stages, only stash zipped ckProfiler
* fix syntax
* add GPU check before build and test, rename docker to usual name
[ROCm/composable_kernel commit: 85b0920dc8]
This commit is contained in:
@@ -2,15 +2,14 @@
|
||||
#
|
||||
# in order to run this script you'd need the following python packages:
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
#pip3 install --upgrade pip
|
||||
#pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
|
||||
#process results
|
||||
gpu_arch=$1
|
||||
python3 process_perf_data.py perf_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm.log
|
||||
python3 process_perf_data.py perf_resnet50_N256.log
|
||||
python3 process_perf_data.py perf_resnet50_N4.log
|
||||
|
||||
@@ -10,15 +10,14 @@
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
|
||||
#process results
|
||||
gpu_arch=$1
|
||||
python3 process_perf_data.py perf_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_reduction_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_splitK_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_onnx_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm.log
|
||||
python3 process_perf_data.py perf_resnet50_N256.log
|
||||
python3 process_perf_data.py perf_resnet50_N4.log
|
||||
python3 process_perf_data.py perf_batched_gemm.log
|
||||
python3 process_perf_data.py perf_grouped_gemm.log
|
||||
python3 process_perf_data.py perf_conv_fwd.log
|
||||
python3 process_perf_data.py perf_conv_bwd_data.log
|
||||
python3 process_perf_data.py perf_gemm_bilinear.log
|
||||
python3 process_perf_data.py perf_reduction.log
|
||||
python3 process_perf_data.py perf_splitK_gemm.log
|
||||
python3 process_perf_data.py perf_onnx_gemm.log
|
||||
|
||||
@@ -5,12 +5,11 @@
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verifuy correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
|
||||
@@ -19,11 +18,9 @@ export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export gpu_arch=$3
|
||||
echo 'GPU architecture: ' $gpu_arch
|
||||
export branch=$4
|
||||
export branch=$3
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$5
|
||||
export host_name=$4
|
||||
echo 'Host name: ' $host_name
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
@@ -38,7 +35,7 @@ function print_log_header(){
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
|
||||
@@ -58,7 +55,7 @@ print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
|
||||
|
||||
#run batched_gemm tests
|
||||
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
|
||||
export batched_gemm_log="perf_batched_gemm.log"
|
||||
print_log_header $batched_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
|
||||
@@ -78,7 +75,7 @@ print_log_header $batched_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
|
||||
|
||||
#run grouped_gemm tests
|
||||
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
|
||||
export grouped_gemm_log="perf_grouped_gemm.log"
|
||||
print_log_header $grouped_gemm_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
|
||||
@@ -86,7 +83,7 @@ print_log_header $grouped_gemm_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
|
||||
|
||||
#run GEMM+Bilinear tests
|
||||
export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log"
|
||||
export gemm_bilinear_log="perf_gemm_bilinear.log"
|
||||
print_log_header $gemm_bilinear_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
@@ -94,7 +91,7 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
|
||||
#run conv_fwd tests
|
||||
export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log"
|
||||
export conv_fwd_log="perf_conv_fwd.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
@@ -102,7 +99,7 @@ print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
|
||||
#run conv_bwd_data tests
|
||||
export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log"
|
||||
export conv_bwd_data_log="perf_conv_bwd_data.log"
|
||||
print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
@@ -110,33 +107,43 @@ print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log
|
||||
|
||||
#run reduction tests
|
||||
export reduction_log="perf_reduction_${gpu_arch}.log"
|
||||
export reduction_log="perf_reduction.log"
|
||||
print_log_header $reduction_log $env_type $branch $host_name
|
||||
./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
|
||||
./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
|
||||
|
||||
#run splitK_gemm tests
|
||||
export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log"
|
||||
#run splitK_gemm tests, first correctness verification, then performance
|
||||
export splitK_gemm_ver_log="perf_splitK_gemm_verify.log"
|
||||
print_log_header $splitK_gemm_ver_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
export splitK_gemm_log="perf_splitK_gemm.log"
|
||||
print_log_header $splitK_gemm_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
|
||||
#run ONNX gemm tests
|
||||
export onnx_log="perf_onnx_gemm_${gpu_arch}.log"
|
||||
export onnx_log="perf_onnx_gemm.log"
|
||||
print_log_header $onnx_log $env_type $branch $host_name
|
||||
./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verify correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
|
||||
@@ -15,11 +14,9 @@ export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export gpu_arch=$3
|
||||
echo 'GPU architecture: ' $gpu_arch
|
||||
export branch=$4
|
||||
export branch=$3
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$5
|
||||
export host_name=$4
|
||||
echo 'Host name: ' $host_name
|
||||
|
||||
function print_log_header(){
|
||||
@@ -35,7 +32,7 @@ function print_log_header(){
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
@@ -55,9 +52,9 @@ print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log
|
||||
|
||||
Reference in New Issue
Block a user