From e377ca404bf9bb47a502d7f16984a3639a7e1337 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Wed, 7 Aug 2024 08:18:26 -0700 Subject: [PATCH] Run CK_TILE FMHA benchmarks and collect the performance data. (#1447) * run ck_tile benchmarks after the smoke tests and store logs * change the path of fmha benchmark logs * change the way of stashig ck_tile fmha logs * prevent the errors in stages where no logs are generated * fix the ck_tile fmha log names and headers * generate the fmha performance logs in the root folder * change jenkins scrip arguments format * use exact file names for stashing * modify scripts to process FMHA performance results * unstash FMHA logs before parsing them [ROCm/composable_kernel commit: 12c1f68dd9fc7fa6d8d9998b1373c2e601d1c501] --- Jenkinsfile | 30 ++++++++++-- .../ck_tile/01_fmha/script/run_full_test.sh | 46 +++++++++++++++++++ script/process_perf_data.py | 14 ++++++ script/process_perf_data.sh | 17 +++++++ script/process_qa_data.sh | 17 +++++++ 5 files changed, 120 insertions(+), 4 deletions(-) create mode 100755 example/ck_tile/01_fmha/script/run_full_test.sh diff --git a/Jenkinsfile b/Jenkinsfile index b6df09bf8a..97f8764272 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -285,6 +285,19 @@ def cmake_build(Map conf=[:]){ if (package_build == true && (env.BRANCH_NAME == "develop" || env.BRANCH_NAME == "amd-master")) { archiveArtifacts artifacts: "build/*.deb", allowEmptyArchive: true, fingerprint: true } + if (params.RUN_CK_TILE_TESTS){ + try{ + archiveArtifacts "perf_fmha_fwd_*.log" + archiveArtifacts "perf_fmha_bwd_*.log" + stash name: "perf_fmha_fwd_gfx942.log" + stash name: "perf_fmha_bwd_gfx942.log" + stash name: "perf_fmha_fwd_gfx90a.log" + stash name: "perf_fmha_bwd_gfx90a.log" + } + catch(Exception err){ + echo "could not locate the requested artifacts: ${err.getMessage()}. will skip the stashing." + } + } } def buildHipClangJob(Map conf=[:]){ @@ -612,6 +625,17 @@ def process_results(Map conf=[:]){ timeout(time: 1, unit: 'HOURS'){ try{ dir("script"){ + if (params.RUN_CK_TILE_TESTS){ + try{ + unstash "perf_fmha_fwd_gfx942.log" + unstash "perf_fmha_bwd_gfx942.log" + unstash "perf_fmha_fwd_gfx90a.log" + unstash "perf_fmha_bwd_gfx90a.log" + } + catch(Exception err){ + echo "could not locate the FMHA performance logs: ${err.getMessage()}." + } + } if (params.RUN_FULL_QA){ // unstash perf files to master unstash "ckprofiler_0.2.0_amd64.deb" @@ -852,8 +876,7 @@ pipeline { execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \ make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \ cd ../ && - example/ck_tile/01_fmha/script/smoke_test_fwd.sh && \ - example/ck_tile/01_fmha/script/smoke_test_bwd.sh""" + example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """ } steps{ buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) @@ -872,8 +895,7 @@ pipeline { execute_args = """ ../script/cmake-ck-dev.sh ../ gfx942 && \ make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \ cd ../ && - example/ck_tile/01_fmha/script/smoke_test_fwd.sh && \ - example/ck_tile/01_fmha/script/smoke_test_bwd.sh""" + example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """ } steps{ buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) diff --git a/example/ck_tile/01_fmha/script/run_full_test.sh b/example/ck_tile/01_fmha/script/run_full_test.sh new file mode 100755 index 0000000000..b5e6778aa5 --- /dev/null +++ b/example/ck_tile/01_fmha/script/run_full_test.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# +# in order to run this script you'd first need to build the tile_example_fmha_fwd and tile_eaxmple_fmha_bwd executables in ../build/bin/ +# +# run the script as "./run_full_test.sh +# input arguments: +# environment tag : a string describing the specifics of your test environment +# branch name : name of the branch in git repo (git status | grep -e 'On branch') +# host name : $hostname +# gpu architecture: e.g., gfx90a, or gfx942, etc. + +#get the command line arguments: +export env_type=$1 +echo 'Environment type: ' $env_type +export branch=$2 +echo 'Branch name: ' $branch +export host_name=$3 +echo 'Host name: ' $host_name +export GPU_arch=$4 +echo 'GPU_arch: ' $GPU_arch + +function print_log_header(){ + rm -f $1; + echo 'On branch ' $3 &> $1; + echo 'Node name: ' $4 >> $1; + #get GPU_arch and number of compute units from rocminfo + echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1; + rocminfo | grep "Compute Unit:" >> $1; + hipcc --version | grep -e 'HIP version' >> $1; + echo 'Environment type: ' $2 >> $1; + /opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1; +} + +#run verification tests +example/ck_tile/01_fmha/script/smoke_test_fwd.sh +example/ck_tile/01_fmha/script/smoke_test_bwd.sh + +#run performance benchmarks +export fmha_fwd_log="perf_fmha_fwd_$GPU_arch.log" +print_log_header $fmha_fwd_log $env_type $branch $host_name +example/ck_tile/01_fmha/script/benchmark_fwd.sh 2>&1 | tee -a $fmha_fwd_log + +export fmha_bwd_log="perf_fmha_bwd_$GPU_arch.log" +print_log_header $fmha_bwd_log $env_type $branch $host_name +example/ck_tile/01_fmha/script/benchmark_bwd.sh 2>&1 | tee -a $fmha_bwd_log + diff --git a/script/process_perf_data.py b/script/process_perf_data.py index 2c46da8fd2..c6cb6e05c7 100644 --- a/script/process_perf_data.py +++ b/script/process_perf_data.py @@ -143,6 +143,12 @@ def parse_logfile(logfile): if 'Best Perf' in line: lst=line.split() res.append(lst[36]) + elif 'perf_fmha' in logfile: + for line in open(logfile): + if 'TFlops' in line: + lst=line.split() + line_dict=dict(zip(lst[1:],lst)) + res.append(line_dict['TFlops,']) return res @@ -304,6 +310,14 @@ def main(): for i in range(1,len(results)+1): testlist.append("Test%i"%i) table_name="ck_mixed_gemm_tflops" + if 'fmha_fwd' in filename: + for i in range(1,len(results)+1): + testlist.append("Test%i"%i) + table_name="ck_fmha_fwd_tflops" + if 'fmha_bwd' in filename: + for i in range(1,len(results)+1): + testlist.append("Test%i"%i) + table_name="ck_fmha_bwd_tflops" tflops_base = get_baseline(table_name,conn) store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, conn) diff --git a/script/process_perf_data.sh b/script/process_perf_data.sh index 15fc5cb15f..af1e7e7a0d 100755 --- a/script/process_perf_data.sh +++ b/script/process_perf_data.sh @@ -13,3 +13,20 @@ python3 process_perf_data.py perf_gemm.log python3 process_perf_data.py perf_resnet50_N256.log python3 process_perf_data.py perf_resnet50_N4.log + +file=./perf_fmha_fwd_gfx942.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_fwd_gfx942.log +fi +file=./perf_fmha_bwd_gfx942.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_bwd_gfx942.log +fi +file=./perf_fmha_fwd_gfx90a.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_fwd_gfx90a.log +fi +file=./perf_fmha_bwd_gfx90a.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_bwd_gfx90a.log +fi diff --git a/script/process_qa_data.sh b/script/process_qa_data.sh index abf1e6234e..bf16f05cd0 100755 --- a/script/process_qa_data.sh +++ b/script/process_qa_data.sh @@ -21,3 +21,20 @@ python3 process_perf_data.py perf_gemm_bilinear.log python3 process_perf_data.py perf_reduction.log python3 process_perf_data.py perf_splitK_gemm.log python3 process_perf_data.py perf_onnx_gemm.log + +file=./perf_fmha_fwd_gfx942.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_fwd_gfx942.log +fi +file=./perf_fmha_bwd_gfx942.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_bwd_gfx942.log +fi +file=./perf_fmha_fwd_gfx90a.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_fwd_gfx90a.log +fi +file=./perf_fmha_bwd_gfx90a.log +if [ -e "$file" ]; then + python3 process_perf_data.py perf_fmha_bwd_gfx90a.log +fi