diff --git a/Jenkinsfile b/Jenkinsfile index becdc35b16..3cac20fd34 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,5 @@ def rocmnode(name) { - return '(rocmtest || miopen) && ' + name + return '(rocmtest || miopen) && (' + name + ')' } def show_node_info() { @@ -7,6 +7,7 @@ def show_node_info() { echo "NODE_NAME = \$NODE_NAME" lsb_release -sd uname -r + cat /sys/module/amdgpu/version ls /opt/ -la """ } @@ -33,6 +34,10 @@ def runShell(String command){ def getDockerImageName(){ def img + if (params.USE_CUSTOM_DOCKER != ""){ + img = "${params.USE_CUSTOM_DOCKER}" + } + else{ if (params.ROCMVERSION != "6.0.1"){ if (params.COMPILER_VERSION == "") { img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}" @@ -61,6 +66,7 @@ def getDockerImageName(){ } } } + } return img } @@ -365,8 +371,8 @@ def runCKProfiler(Map conf=[:]){ (retimage, image) = getDockerImage(conf) withDockerContainer(image: image, args: dockerOpts) { timeout(time: 5, unit: 'MINUTES'){ - sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' - if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ + sh 'rocminfo | tee rocminfo.log' + if ( !runShell('grep -n "gfx" rocminfo.log') ){ throw new Exception ("GPU not found") } else{ @@ -379,20 +385,6 @@ def runCKProfiler(Map conf=[:]){ echo "The job was cancelled or aborted" throw e } - catch(Exception ex) { - retimage = docker.build("${image}", dockerArgs + " --no-cache .") - withDockerContainer(image: image, args: dockerOpts) { - timeout(time: 5, unit: 'MINUTES'){ - sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' - if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ - throw new Exception ("GPU not found") - } - else{ - echo "GPU is OK" - } - } - } - } withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { timeout(time: 24, unit: 'HOURS') @@ -473,6 +465,7 @@ def Build_CK(Map conf=[:]){ show_node_info() env.HSA_ENABLE_SDMA=0 + env.DOCKER_BUILDKIT=1 checkout scm def image = getDockerImageName() @@ -487,25 +480,35 @@ def Build_CK(Map conf=[:]){ if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){ dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' " } + def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3') + def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3') + dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} " + echo "Docker flags: ${dockerOpts}" def variant = env.STAGE_NAME def retimage def navi_node = 0 + def mi300_node = 0 - gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { + gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { try { (retimage, image) = getDockerImage(conf) withDockerContainer(image: image, args: dockerOpts) { timeout(time: 5, unit: 'MINUTES'){ - sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' - if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ + sh 'rocminfo | tee rocminfo.log' + if ( !runShell('grep -n "gfx" rocminfo.log') ){ throw new Exception ("GPU not found") } else{ echo "GPU is OK" } - if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){ + if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') ){ navi_node = 1 + echo "This is a Navi node" + } + if ( runShell('grep -n "gfx942" rocminfo.log') ){ + mi300_node = 1 + echo "This is MI300 node" } } } @@ -514,23 +517,6 @@ def Build_CK(Map conf=[:]){ echo "The job was cancelled or aborted" throw e } - catch(Exception ex) { - retimage = docker.build("${image}", dockerArgs + " --no-cache .") - withDockerContainer(image: image, args: dockerOpts) { - timeout(time: 5, unit: 'MINUTES'){ - sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log' - if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ - throw new Exception ("GPU not found") - } - else{ - echo "GPU is OK" - } - if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){ - navi_node = 1 - } - } - } - } withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { timeout(time: 24, unit: 'HOURS') { @@ -544,8 +530,8 @@ def Build_CK(Map conf=[:]){ sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler' stash "ckProfiler.tar.gz" } - if (params.RUN_FULL_QA){ - // build deb packages + if (params.RUN_FULL_QA && mi300_node == 0 ){ + // build deb packages for all MI100/200/300 targets and prepare to export sh 'make -j package' archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb' archiveArtifacts artifacts: 'composablekernel-tests_*.deb' @@ -610,7 +596,7 @@ def process_results(Map conf=[:]){ def variant = env.STAGE_NAME def retimage - gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { + gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { try { (retimage, image) = getDockerImage(conf) } @@ -678,6 +664,10 @@ pipeline { name: "BUILD_DOCKER", defaultValue: false, description: "Force building docker image (default: false), set to true if docker image needs to be updated.") + string( + name: 'USE_CUSTOM_DOCKER', + defaultValue: '', + description: 'If you want to use a custom docker image, please scecify it here (default: OFF).') string( name: 'ROCMVERSION', defaultValue: '6.0', @@ -828,6 +818,26 @@ pipeline { cleanWs() } } + stage("Build CK and run Tests on MI300") + { + when { + beforeAgent true + expression { params.RUN_FULL_QA.toBoolean() } + } + agent{ label rocmnode("gfx942") } + environment{ + setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx942" -DCMAKE_CXX_FLAGS=" -O3 " """ + execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \ + cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \ + -DGPU_TARGETS="gfx942" \ + -DCMAKE_CXX_COMPILER="${build_compiler()}" \ + -DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """ + } + steps{ + Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') + cleanWs() + } + } stage("Build CK and run Tests on MI100/MI200") { when {