diff --git a/Dockerfile.pytorch b/Dockerfile.pytorch new file mode 100644 index 0000000000..1b71b00fbb --- /dev/null +++ b/Dockerfile.pytorch @@ -0,0 +1,23 @@ +ARG BASE_DOCKER="rocm/pytorch-nightly:latest" +FROM $BASE_DOCKER +ARG CK_PYTORCH_BRANCH="develop" +RUN groupadd -g 109 render && \ + usermod -u 1001 jenkins && \ + groupmod -g 1001 jenkins && \ + cd /tmp/pytorch && \ + rm -rf build && \ + cd /tmp/pytorch/third_party && \ + rm -rf composable_kernel && \ + git clone -b "$CK_PYTORCH_BRANCH" https://github.com/ROCm/composable_kernel.git && \ + cd /tmp/pytorch/third_party/aiter/3rdparty && \ + rm -rf composable_kernel && \ + git clone -b "$CK_PYTORCH_BRANCH" https://github.com/ROCm/composable_kernel.git && \ + cd /tmp/pytorch/third_party/fbgemm/external && \ + rm -rf composable_kernel && \ + git clone -b "$CK_PYTORCH_BRANCH" https://github.com/ROCm/composable_kernel.git && \ + cd /tmp/pytorch/third_party/flash-attention/csrc && \ + rm -rf composable_kernel && \ + git clone -b "$CK_PYTORCH_BRANCH" https://github.com/ROCm/composable_kernel.git && \ + chown -R jenkins:jenkins /tmp/pytorch && \ + chmod -R a+rwx /tmp/pytorch && \ + sudo usermod -aG irc jenkins diff --git a/Jenkinsfile b/Jenkinsfile index 8842ce6814..e7e57aded9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -192,12 +192,16 @@ def buildDocker(install_prefix){ image_name = "rocm/composable_kernel:ck_aiter" dockerArgs = dockerArgs + " --no-cache -f Dockerfile.aiter --build-arg AITER_BRANCH='${params.aiter_branch}' --build-arg CK_AITER_BRANCH='${params.ck_aiter_branch}' . " } - else{ + else if(params.RUN_PYTORCH_TESTS){ + image_name = "rocm/composable_kernel:ck_pytorch" + dockerArgs = dockerArgs + " --no-cache -f Dockerfile.pytorch --build-arg CK_PYTORCH_BRANCH='${params.ck_pytorch_branch}' . " + } + else{ dockerArgs = dockerArgs + " -f Dockerfile . " } echo "Build Args: ${dockerArgs}" try{ - if(params.BUILD_DOCKER || params.RUN_AITER_TESTS){ + if(params.BUILD_DOCKER || params.RUN_AITER_TESTS || params.RUN_PYTORCH_TESTS){ //force building the new docker if that parameter is true echo "Building image: ${image_name}" retimage = docker.build("${image_name}", dockerArgs) @@ -871,13 +875,64 @@ def run_aiter_tests(Map conf=[:]){ } } + +def run_pytorch_tests(Map conf=[:]){ + show_node_info() + env.HSA_ENABLE_SDMA=0 + checkout scm + //use the latest pytorch-nightly image + def image = "rocm/composable_kernel:ck_pytorch" + def dockerOpts="--network=host --device=/dev/kfd --device=/dev/dri --group-add video --group-add render --group-add irc --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --user=jenkins -v=/var/jenkins/:/var/jenkins" + def variant = env.STAGE_NAME + def retimage + def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3') + def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3') + dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} " + echo "Docker flags: ${dockerOpts}" + + gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { + try + { + echo "Pulling image: ${image}" + retimage = docker.image("${image}") + withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) { + retimage.pull() + } + } + catch(Exception ex) + { + error "Unable to locate image: ${image}" + } + } + + withDockerContainer(image: image, args: dockerOpts) { + timeout(time: 45, unit: 'MINUTES'){ + try{ + sh "rocminfo" + sh "python3 --version" + sh "python3 /tmp/pytorch/tools/amd_build/build_amd.py" + sh "USE_ROCM_CK_SDPA=1 PYTORCH_ROCM_ARCH=gfx942 python /tmp/pytorch/setup.py develop" + } + catch(e){ + echo "Throwing error exception while building Pytorch" + echo 'Exception occurred: ' + e.toString() + throw e + } + finally{ + echo "Finished building Pytorch" + } + } + } +} + //launch develop branch daily jobs CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;DISABLE_DL_KERNELS=true;RUN_CK_TILE_FMHA_TESTS=true;RUN_TILE_ENGINE_GEMM_TESTS=true;RUN_PERFORMANCE_TESTS=true;RUN_ALL_UNIT_TESTS=true 0 21 * * * % RUN_GROUPED_CONV_LARGE_CASES_TESTS=true;hipTensor_test=true;BUILD_GFX908=true;BUILD_GFX942=true;BUILD_GFX950=true;RUN_PERFORMANCE_TESTS=true;RUN_ALL_UNIT_TESTS=true 0 19 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;RUN_ALL_UNIT_TESTS=true 0 17 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-mainline;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;RUN_ALL_UNIT_TESTS=true 0 15 * * * % BUILD_INSTANCES_ONLY=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true - 0 13 * * * % RUN_AITER_TESTS=true;BUILD_LEGACY_OS=true;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false''' : "" + 0 13 * * * % RUN_AITER_TESTS=true;BUILD_LEGACY_OS=true;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false + 0 11 * * * % RUN_PYTORCH_TESTS=true;RUN_CODEGEN_TESTS=false;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false;BUILD_GFX10=false;BUILD_GFX11=false;BUILD_GFX12=false;BUILD_GFX90A=false''' : "" pipeline { agent none @@ -1012,6 +1067,14 @@ pipeline { name: "RUN_ALL_UNIT_TESTS", defaultValue: false, description: "Run all unit tests (default: OFF)") + booleanParam( + name: "RUN_PYTORCH_TESTS", + defaultValue: false, + description: "Try building PYTORCH with latest CK develop branch (default: OFF)") + string( + name: 'ck_pytorch_branch', + defaultValue: 'develop', + description: 'Specify which branch of CK to test with Pytorch (default: develop)') booleanParam( name: "RUN_AITER_TESTS", defaultValue: false, @@ -1103,6 +1166,24 @@ pipeline { } } } + } + stage("Run Pytorch Tests") + { + parallel + { + stage("Run Pytorch Tests on gfx942") + { + when { + beforeAgent true + expression { params.RUN_PYTORCH_TESTS.toBoolean() } + } + agent{ label rocmnode("gfx942")} + steps{ + run_pytorch_tests() + cleanWs() + } + } + } } stage("Run AITER Tests") {