From 8dd7973ffeb96ec7d141885828643bfcacc0ba62 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Mon, 23 Sep 2024 09:03:55 -0700 Subject: [PATCH] Add a daily CI build with legacy dockers. (#1525) * add an option to build CK with legacy dockers * change the custom docker settings * add environment varianble for custom docker * use a new variable for legacy docker name * new way to pass docker names for legacy OS * add legacy docker check in the Build_CK function * change groovy syntax * add a check for legacy docker in getDockerImage * make sure the legacy docker name is not empty * remove the dumb-init call * disable the tests in legacy OS dockers * disable tests in legacy dockers * use a different way to disable tests in legacy dockers * rearrange the CI stages for legacy OS * use different way to disable tests in legacy dockers * update LD_LIBRARY_PATH for legacy dockers and add cron job * update LD_LIBRARY_PATH at docker launch * change the sytax for setting LD_LIBRARY_PATH [ROCm/composable_kernel commit: f16ebf82d42d49dc8905e5d6ac66eee8b25cd524] --- Jenkinsfile | 111 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 19 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index cdc4b477b1..1e16b2f6f0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -100,7 +100,15 @@ def getDockerImage(Map conf=[:]){ dockerArgs = dockerArgs + " --no-cache " } echo "Docker Args: ${dockerArgs}" - def image = getDockerImageName() + def image + if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){ + image = conf.get("docker_name", "") + echo "Using legacy docker: ${image}" + } + else{ + image = getDockerImageName() + echo "Using default docker: ${image}" + } //Check if image exists def retimage try @@ -125,7 +133,9 @@ def buildDocker(install_prefix){ def image_name = getDockerImageName() echo "Building Docker for ${image_name}" def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' " - + if(params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){ + dockerArgs = dockerArgs + " --no-cache " + } echo "Build Args: ${dockerArgs}" try{ if(params.BUILD_DOCKER){ @@ -259,6 +269,7 @@ def cmake_build(Map conf=[:]){ """) sh cmd3 } + // reduce parallelism when compiling, clang uses too much memory def nt = nthreads() def cmd @@ -273,7 +284,7 @@ def cmake_build(Map conf=[:]){ } else{ setup_cmd = conf.get("setup_cmd", "${cmake_envs} cmake ${setup_args} .. ") - build_cmd = conf.get("build_cmd", "${build_envs} dumb-init make -j${nt} ${config_targets}") + build_cmd = conf.get("build_cmd", "${build_envs} make -j${nt} ${config_targets}") } cmd = conf.get("cmd", """ ${setup_cmd} @@ -292,8 +303,8 @@ def cmake_build(Map conf=[:]){ dir("build"){ //build CK sh cmd - //run tests - if(!setup_args.contains("NO_CK_BUILD")){ + //run tests except when NO_CK_BUILD or BUILD_LEGACY_OS are set + if(!setup_args.contains("NO_CK_BUILD") && !params.BUILD_LEGACY_OS){ if (setup_args.contains("gfx90a") && params.NINJA_BUILD_TRACE){ sh "/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json" archiveArtifacts "ck_build_trace.json" @@ -330,7 +341,15 @@ def buildHipClangJob(Map conf=[:]){ env.HSA_ENABLE_SDMA=0 checkout scm - def image = getDockerImageName() + def image + if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){ + image = conf.get("docker_name", "") + echo "Using legacy docker: ${image}" + } + else{ + image = getDockerImageName() + echo "Using default docker: ${image}" + } def prefixpath = conf.get("prefixpath", "/opt/rocm") // Jenkins is complaining about the render group @@ -512,7 +531,16 @@ def Build_CK(Map conf=[:]){ env.DOCKER_BUILDKIT=1 checkout scm - def image = getDockerImageName() + def image + if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){ + image = conf.get("docker_name", "") + echo "Using legacy docker: ${image}" + } + else{ + image = getDockerImageName() + echo "Using default docker: ${image}" + } + def prefixpath = conf.get("prefixpath", "/opt/rocm") // Jenkins is complaining about the render group @@ -524,6 +552,9 @@ def Build_CK(Map conf=[:]){ if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){ dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' " } + if(params.BUILD_LEGACY_OS){ + dockerOpts = dockerOpts + " --env LD_LIBRARY_PATH='/opt/Python-3.8.13/lib' " + } def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3') def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3') dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} " @@ -707,7 +738,8 @@ CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCM 0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true 0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true 0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true - 0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false''' : "" + 0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false + 0 13 * * * % BUILD_LEGACY_OS=true ''' : "" pipeline { agent none @@ -794,6 +826,10 @@ pipeline { name: "NINJA_BUILD_TRACE", defaultValue: false, description: "Generate a ninja build trace (default: OFF)") + booleanParam( + name: "BUILD_LEGACY_OS", + defaultValue: false, + description: "Try building CK with legacy OS dockers: RHEL8 and SLES15 (default: OFF)") } environment{ dbuser = "${dbuser}" @@ -946,7 +982,6 @@ pipeline { { parallel { - stage("Run CK_TILE_GEMM Tests on gfx90a") { when { @@ -965,7 +1000,6 @@ pipeline { buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) cleanWs() } - } stage("Run CK_TILE_GEMM Tests on gfx942") { @@ -988,15 +1022,54 @@ pipeline { } } } + stage("Build CK and run Tests") { parallel { + stage("Build CK with RHEL8") + { + when { + beforeAgent true + expression { params.BUILD_LEGACY_OS.toBoolean() } + } + agent{ label rocmnode("gfx90a") } + environment{ + def docker_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_rhel8_rocm6.3" + setup_args = """ -DGPU_TARGETS="gfx942" \ + -DCMAKE_CXX_FLAGS=" -O3 " \ + -DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """ + execute_args = " " + } + steps{ + Build_CK_and_Reboot(setup_args: setup_args, config_targets: " ", no_reboot:true, build_type: 'Release', docker_name: docker_name) + cleanWs() + } + } + stage("Build CK with SLES15") + { + when { + beforeAgent true + expression { params.BUILD_LEGACY_OS.toBoolean() } + } + agent{ label rocmnode("gfx90a") } + environment{ + def docker_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_sles15_rocm6.3" + setup_args = """ -DGPU_TARGETS="gfx942" \ + -DCMAKE_CXX_FLAGS=" -O3 " \ + -DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """ + execute_args = " " + } + steps{ + Build_CK_and_Reboot(setup_args: setup_args, config_targets: " ", no_reboot:true, build_type: 'Release', docker_name: docker_name) + cleanWs() + } + } stage("Build CK for all gfx9 targets") { when { beforeAgent true - expression { params.RUN_FULL_QA.toBoolean() } + expression { params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx90a") } environment{ @@ -1018,7 +1091,7 @@ pipeline { { when { beforeAgent true - expression { params.RUN_FULL_QA.toBoolean() } + expression { params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx942") } environment{ @@ -1038,7 +1111,7 @@ pipeline { { when { beforeAgent true - expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } + expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx90a") } environment{ @@ -1058,7 +1131,7 @@ pipeline { { when { beforeAgent true - expression { params.BUILD_INSTANCES_ONLY.toBoolean() && !params.RUN_FULL_QA.toBoolean() } + expression { params.BUILD_INSTANCES_ONLY.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx90a") } environment{ @@ -1077,7 +1150,7 @@ pipeline { { when { beforeAgent true - expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } + expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx1030") } environment{ @@ -1097,7 +1170,7 @@ pipeline { { when { beforeAgent true - expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } + expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx1101") } environment{ @@ -1117,7 +1190,7 @@ pipeline { { when { beforeAgent true - expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } + expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent{ label rocmnode("gfx1201") } environment{ @@ -1144,7 +1217,7 @@ pipeline { { when { beforeAgent true - expression { params.RUN_PERFORMANCE_TESTS.toBoolean() } + expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } options { retry(1) } agent{ label rocmnode("gfx90a")} @@ -1165,7 +1238,7 @@ pipeline { stage("Process results"){ when { beforeAgent true - expression { params.RUN_PERFORMANCE_TESTS.toBoolean() } + expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() } } agent { label 'mici' } steps{