mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Add a daily CI stage to test AITER with latest CK. (#2598)
* add a CI stage for AITER testing
[ROCm/composable_kernel commit: e6104daecc]
This commit is contained in:
17
Dockerfile.aiter
Normal file
17
Dockerfile.aiter
Normal file
@@ -0,0 +1,17 @@
|
||||
ARG BASE_DOCKER="rocm/pytorch:latest"
|
||||
FROM $BASE_DOCKER
|
||||
RUN groupadd -f render && \
|
||||
pip install pandas zmq einops && \
|
||||
pip install numpy==1.26.2 && \
|
||||
sudo mkdir /home/jenkins && \
|
||||
sudo mkdir /home/jenkins/workspace && \
|
||||
cd /home/jenkins/workspace && \
|
||||
rm -rf aiter && \
|
||||
git clone --recursive https://github.com/ROCm/aiter.git && \
|
||||
cd aiter && \
|
||||
rm -rf 3rdparty/composable_kernel/ && \
|
||||
git clone https://github.com/ROCm/composable_kernel.git 3rdparty/composable_kernel/ && \
|
||||
python3 setup.py develop && \
|
||||
chown -R jenkins:jenkins /home/jenkins/workspace && \
|
||||
chmod -R a+rwx /home/jenkins/workspace && \
|
||||
sudo usermod -aG irc jenkins
|
||||
79
Jenkinsfile
vendored
79
Jenkinsfile
vendored
@@ -188,12 +188,16 @@ def buildDocker(install_prefix){
|
||||
if(params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline" || params.COMPILER_COMMIT != ""){
|
||||
dockerArgs = dockerArgs + " --no-cache --build-arg BASE_DOCKER='${base_image_name}' -f Dockerfile.compiler . "
|
||||
}
|
||||
else if(params.RUN_AITER_TESTS){
|
||||
image_name = "rocm/composable_kernel:ck_aiter"
|
||||
dockerArgs = dockerArgs + " --no-cache -f Dockerfile.aiter . "
|
||||
}
|
||||
else{
|
||||
dockerArgs = dockerArgs + " -f Dockerfile . "
|
||||
}
|
||||
echo "Build Args: ${dockerArgs}"
|
||||
try{
|
||||
if(params.BUILD_DOCKER){
|
||||
if(params.BUILD_DOCKER || params.RUN_AITER_TESTS){
|
||||
//force building the new docker if that parameter is true
|
||||
echo "Building image: ${image_name}"
|
||||
retimage = docker.build("${image_name}", dockerArgs)
|
||||
@@ -807,13 +811,62 @@ def process_results(Map conf=[:]){
|
||||
}
|
||||
}
|
||||
|
||||
def run_aiter_tests(Map conf=[:]){
|
||||
show_node_info()
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
checkout scm
|
||||
//use the latest pytorch image
|
||||
def image = "rocm/composable_kernel:ck_aiter"
|
||||
def dockerOpts="--network=host --device=/dev/kfd --device=/dev/dri --group-add video --group-add render --group-add irc --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --user=jenkins -v=/var/jenkins/:/var/jenkins"
|
||||
def variant = env.STAGE_NAME
|
||||
def retimage
|
||||
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
|
||||
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
|
||||
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} "
|
||||
echo "Docker flags: ${dockerOpts}"
|
||||
|
||||
gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
try
|
||||
{
|
||||
echo "Pulling image: ${image}"
|
||||
retimage = docker.image("${image}")
|
||||
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
|
||||
retimage.pull()
|
||||
}
|
||||
}
|
||||
catch(Exception ex)
|
||||
{
|
||||
error "Unable to locate image: ${image}"
|
||||
}
|
||||
}
|
||||
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 45, unit: 'MINUTES'){
|
||||
try{
|
||||
sh "python3 --version"
|
||||
sh "rocminfo"
|
||||
sh "python3 ../aiter/op_tests/test_gemm_a8w8_blockscale.py"
|
||||
//sh "python3 ../aiter/op_tests/test_mha.py"
|
||||
}
|
||||
catch(e){
|
||||
echo "Throwing error exception while running AITER tests"
|
||||
echo 'Exception occurred: ' + e.toString()
|
||||
throw e
|
||||
}
|
||||
finally{
|
||||
echo "Finished running AITER tests"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//launch develop branch daily jobs
|
||||
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;DISABLE_DL_KERNELS=true;RUN_CK_TILE_FMHA_TESTS=true;RUN_CK_TILE_TRANSPOSE_TESTS=true;RUN_CK_TILE_GEMM_TESTS=true;RUN_TILE_ENGINE_GEMM_TESTS=true;RUN_PERFORMANCE_TESTS=true;RUN_ALL_UNIT_TESTS=true
|
||||
0 21 * * * % RUN_GROUPED_CONV_LARGE_CASES_TESTS=true;hipTensor_test=true;BUILD_GFX908=true;BUILD_GFX942=true;BUILD_GFX950=true;RUN_PERFORMANCE_TESTS=true;RUN_ALL_UNIT_TESTS=true
|
||||
0 19 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;RUN_ALL_UNIT_TESTS=true
|
||||
0 17 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-mainline;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;RUN_ALL_UNIT_TESTS=true
|
||||
0 15 * * * % BUILD_INSTANCES_ONLY=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
|
||||
0 13 * * * % BUILD_LEGACY_OS=true;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false''' : ""
|
||||
0 13 * * * % RUN_AITER_TESTS=true;BUILD_LEGACY_OS=true;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false''' : ""
|
||||
|
||||
pipeline {
|
||||
agent none
|
||||
@@ -952,6 +1005,10 @@ pipeline {
|
||||
name: "RUN_ALL_UNIT_TESTS",
|
||||
defaultValue: false,
|
||||
description: "Run all unit tests (default: OFF)")
|
||||
booleanParam(
|
||||
name: "RUN_AITER_TESTS",
|
||||
defaultValue: false,
|
||||
description: "Run AITER tests with latest CK develop branch (default: OFF)")
|
||||
}
|
||||
environment{
|
||||
dbuser = "${dbuser}"
|
||||
@@ -1032,6 +1089,24 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Run AITER Tests")
|
||||
{
|
||||
parallel
|
||||
{
|
||||
stage("Run AITER Tests on gfx90a")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_AITER_TESTS.toBoolean() }
|
||||
}
|
||||
agent{ label rocmnode("gfx90a")}
|
||||
steps{
|
||||
run_aiter_tests()
|
||||
cleanWs()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Run Grouped Conv Large Case Tests")
|
||||
{
|
||||
parallel
|
||||
|
||||
Reference in New Issue
Block a user