From ef3be1d9cff77b35460d0e925a6498fb2e16b7b7 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Thu, 2 Mar 2023 09:24:31 -0800 Subject: [PATCH] Change the CI workflow. (#611) * add new parallel stage on navi node * dont run performance tests on navi, get rid of 9110 compiler * only run navi build when not doing QA * fix syntax * use navi21 label * dont stash profiler on navi nodes, scp deb package to ginger * disable tests on navi nodes * test posting a binary to ginger * add sshpass and use it to copy deb package * fix the scp example * fix syntax * debug the scp issues * add jenkins user to docker * dont try whoami * change jenkins uid and add user with uid=1002 * try scp from the last stage on micimaster * rename and stash the package, scp from micimaster [ROCm/composable_kernel commit: e6cda9f8ff8baa58d61905239a70f93db1933eb5] --- Dockerfile | 3 +++ Jenkinsfile | 54 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index dd2a97c7bd..b03cb836ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,8 @@ ARG compiler_commit="" RUN set -xe ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/.apt_$ROCMVERSION/ +RUN useradd -rm -d /home/jenkins -s /bin/bash -u 1004 jenkins +RUN useradd -rm -d /home/manitera -s /bin/bash -u 1002 manitera # Add rocm repository RUN apt-get update RUN apt-get install -y wget gnupg @@ -37,6 +39,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow- python-dev \ python3-dev \ python3-pip \ + sshpass \ software-properties-common \ rocm-dev \ rocm-device-libs \ diff --git a/Jenkinsfile b/Jenkinsfile index 6b255ce13c..6bd6aa81b2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,7 +14,6 @@ def show_node_info() { def runShell(String command){ def responseCode = sh returnStatus: true, script: "${command} > tmp.txt" def output = readFile(file: "tmp.txt") - echo "tmp.txt contents: $output" return (output != "") } @@ -427,6 +426,7 @@ def Build_CK(Map conf=[:]){ def variant = env.STAGE_NAME def retimage + def navi_node = 0 gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { try { @@ -440,6 +440,9 @@ def Build_CK(Map conf=[:]){ else{ echo "GPU is OK" } + if ( runShell('grep -n "gfx1030" clinfo.log') ){ + navi_node = 1 + } } } } @@ -458,6 +461,9 @@ def Build_CK(Map conf=[:]){ else{ echo "GPU is OK" } + if ( runShell('grep -n "gfx1030" clinfo.log') ){ + navi_node = 1 + } } } } @@ -466,16 +472,20 @@ def Build_CK(Map conf=[:]){ { cmake_build(conf) dir("build"){ - //run tests and examples - sh 'make -j check' - //we only need the ckProfiler to run the performance tests, so we pack and stash it - sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler' - stash "ckProfiler.tar.gz" + if (navi_node == 0 ){ + //run tests and examples on all nodes except Navi + sh 'make -j check' + //we only need the ckProfiler to run the performance tests, so we pack and stash it + sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler' + stash "ckProfiler.tar.gz" + } if (params.RUN_FULL_QA){ // build deb packages sh 'make -j package' archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb' archiveArtifacts artifacts: 'composablekernel-tests_*.deb' + sh 'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb' + stash "ckprofiler_0.2.0_amd64.deb" } } } @@ -543,6 +553,8 @@ def process_results(Map conf=[:]){ unstash "perf_splitK_gemm.log" unstash "perf_onnx_gemm.log" sh "./process_qa_data.sh" + unstash "ckprofiler_0.2.0_amd64.deb" + sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/" } else{ // unstash perf files to master @@ -564,7 +576,7 @@ def process_results(Map conf=[:]){ //launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true - 0 21 * * * % RUN_FULL_QA=false;COMPILER_VERSION=release;COMPILER_COMMIT= + 0 21 * * * % COMPILER_VERSION=release;COMPILER_COMMIT= 0 19 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-stg-open;COMPILER_COMMIT=''' : "" pipeline { @@ -653,12 +665,28 @@ pipeline { { parallel { - stage("Build CK and run Tests") + stage("Build CK and run Tests on MI100/MI200") { agent{ label rocmnode("gfx908 || gfx90a") } environment{ - setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx908;gfx90a;gfx1030" -DCMAKE_CXX_FLAGS="-O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" """ : """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx908;gfx90a;gfx1030" -DCMAKE_CXX_FLAGS="-O3 " """ }" - execute_args = "${params.COMPILER_VERSION == "ck-9110" ? """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx908;gfx90a;gfx1030" -DCMAKE_CXX_FLAGS="-O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ : """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx908,gfx90a;gfx1030" -DCMAKE_CXX_FLAGS="-O3" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ }" + setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx908;gfx90a" """ + execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx908,gfx90a" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ + } + steps{ + Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') + } + } + stage("Build CK and run Tests on Navi") + { + when { + beforeAgent true + expression { !params.RUN_FULL_QA.toBoolean() } + } + agent{ label rocmnode("navi21") } + environment{ + setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1030" """ + execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1030" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ + } steps{ Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') @@ -671,7 +699,7 @@ pipeline { { parallel { - stage("Run ckProfiler: gfx908 or gfx90a") + stage("Run ckProfiler: gfx90*") { when { beforeAgent true @@ -680,7 +708,7 @@ pipeline { options { retry(2) } agent{ label rocmnode("gfx908 || gfx90a")} environment{ - setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DGPU_TARGETS="gfx908;gfx90a;gfx1030" -DCMAKE_CXX_FLAGS=" -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -DGPU_TARGETS="gfx908;gfx90a;gfx1030" -DCMAKE_CXX_FLAGS=" -O3 " -DBUILD_DEV=On """}" + setup_args = """ -DGPU_TARGETS="gfx908;gfx90a" -DBUILD_DEV=On """ } steps{ runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') @@ -695,7 +723,7 @@ pipeline { options { retry(2) } agent{ label rocmnode("gfx90a")} environment{ - setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DGPU_TARGETS="gfx90a" -DCMAKE_CXX_FLAGS=" -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -DGPU_TARGETS="gfx90a" -DCMAKE_CXX_FLAGS=" -O3 " -DBUILD_DEV=On """}" + setup_args = """ -DGPU_TARGETS="gfx90a" -DBUILD_DEV=On """ } steps{ runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')