Files
composable_kernel/groovy/vars/ck.groovy
Bartłomiej Kocot 7c2b979de2 [rocm-libraries] ROCm/rocm-libraries#8573 (commit 04c9f1d)
[CK][CK Tile] Drop profiler for experimental builder codegen
 (#8573)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Motivation

Switch to dispatcher profiler for ck tile conv.

## Technical Details

- Switch to dispatcher profiler for ck tile conv.
- Drop profiler for experimental codegen
- Minor fixes for bwd data printing
- Minor fixes for 3d conv in dispatcher codegen

## Test Plan

test_grouped_conv*tile

## Test Result

Passed

## Submission Checklist

- [x] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
2026-06-19 09:38:44 +00:00

1542 lines
70 KiB
Groovy

@NonCPS
String getGitHubCommitHash(def build)
{
def scmAction = build?.actions.find { action ->
action instanceof jenkins.scm.api.SCMRevisionAction
}
if (scmAction?.revision instanceof org.jenkinsci.plugins.github_branch_source.PullRequestSCMRevision)
{
return scmAction.revision.pullHash
}
else if (scmAction?.revision instanceof jenkins.plugins.git.AbstractGitSCMSource$SCMRevisionImpl)
{
return scmAction.revision.hash
}
return null
}
def show_node_info() {
sh """
echo "NODE_NAME = \$NODE_NAME"
hostname
lsb_release -sd
uname -r
cat /sys/module/amdgpu/version
ls /opt/ -la
"""
}
def setGithubStatus(String context, String state, String description) {
def sha = env.GIT_COMMIT
def targetUrl = env.RUN_DISPLAY_URL ?: env.BUILD_URL
def statusUrl = "https://api.github.com/repos/ROCm/rocm-libraries/statuses/${sha}"
withCredentials([usernamePassword(credentialsId: 'github-app-miopen', usernameVariable: 'GITHUB_APP', passwordVariable: 'GITHUB_TOKEN')]) {
def code = '0'
try {
retry(3) {
code = sh(returnStdout: true, script: """
curl -s -w "%{http_code}" -o /dev/null -X POST '${statusUrl}' \\
-H "Authorization: token \$GITHUB_TOKEN" \\
-H 'Content-Type: application/json' \\
-d '{"state":"${state}","context":"${context}","description":"${description}","target_url":"${targetUrl}"}'
""").trim()
if (!code.startsWith('2')) {
error("GitHub status POST returned ${code}")
}
}
} catch (Exception e) {
echo "WARNING: GitHub status POST failed after retries (context=${context}, state=${state}, code=${code})"
}
}
}
// Retry a flaky git network operation a few times with backoff. Handles
// momentary DNS/connectivity blips (e.g. "Could not resolve host: github.com")
// that would otherwise fail the whole build. Wrap each network-touching git
// step (ref-repo clone/update, SCM checkout) so a transient blip retries
// instead of failing the build. If all attempts fail, the node likely can't
// reach github at all, so escalate to a NodeFault: runOnHealthyNode then
// excludes this node and reruns the stage on another one.
def gitNetRetry(String label, Closure body) {
int maxAttempts = 3
for (int i = 1; i <= maxAttempts; i++) {
try { body(); return }
catch (e) {
if (i == maxAttempts) {
echo "${label} failed all ${maxAttempts} attempts on ${env.NODE_NAME}; treating as node fault to reroute to another node: ${e.message}"
throw new org.ck.NodeFault("${label}: ${e.message}")
}
echo "${label} failed (attempt ${i}/${maxAttempts}) on ${env.NODE_NAME}, retrying in 15s: ${e.message}"
sleep(time: 15, unit: 'SECONDS')
}
}
}
def cloneUpdateRefRepo() {
def refRepoPath = "/var/jenkins/ref-repo/rocm-libraries"
def lockLabel = "git ref repo lock - ${env.NODE_NAME}"
def folderExists = sh(
script: "test -d ${refRepoPath}/refs",
returnStatus: true
) == 0
if (!folderExists) {
echo "rocm-libraries repo does not exist at ${refRepoPath}, creating mirror clone..."
echo "locking on label: ${lockLabel}"
lock(lockLabel) {
def cloneCommand = """
set -ex
rm -rf ${refRepoPath} && mkdir -p ${refRepoPath}
git clone --mirror https://github.com/ROCm/rocm-libraries.git ${refRepoPath}
"""
gitNetRetry("clone ref repo") { sh(script: cloneCommand, label: "clone ref repo") }
}
echo "Completed git clone, lock released"
}
echo "rocm-libraries repo exists at ${refRepoPath}, performing git remote update..."
echo "locking on label: ${lockLabel}"
lock(lockLabel) {
def fetchCommand = """
set -ex
cd ${refRepoPath}
git remote prune origin
git remote update
"""
gitNetRetry("update ref repo") { sh(script: fetchCommand, label: "update ref repo") }
}
echo "Completed git ref repo fetch, lock released"
}
def checkoutComposableKernel()
{
//update ref repo
cloneUpdateRefRepo()
// checkout project
gitNetRetry("checkout scm") { checkout scm }
// getGitHubCommitHash reads SCMRevisionAction recorded before any local merge,
// giving the true PR branch tip (pullHash) or branch HEAD (hash).
// Falls back to ORIG_HEAD (pre-merge HEAD set by git merge) when SCMRevisionAction
// is unavailable, then to HEAD for branch builds where no merge occurred.
env.GIT_COMMIT = getGitHubCommitHash(currentBuild.rawBuild) ?: sh(returnStdout: true, script: '''
git rev-parse ORIG_HEAD 2>/dev/null || git rev-parse HEAD
''').trim()
}
def generateAndArchiveBuildTraceVisualization(String buildTraceFileName) {
try {
checkoutComposableKernel()
// Retrieve the build trace artifact
def traceFileExists = false
try {
copyArtifacts(
projectName: env.JOB_NAME,
selector: specific(env.BUILD_NUMBER),
filter: buildTraceFileName
)
traceFileExists = fileExists(buildTraceFileName)
} catch (Exception e) {
echo "Could not copy build trace artifact: ${e.getMessage()}"
traceFileExists = false
return
}
sh """
echo "post artifact download:"
ls -la
"""
// Pull image
def image = "ghcr.io/puppeteer/puppeteer:24.30.0"
echo "Pulling image: ${image}"
def retimage = docker.image("${image}")
retimage.pull()
// Create a temporary workspace
sh """#!/bin/bash
ls -la
mkdir -p workspace
cp ./projects/composablekernel/script/infra_helper/capture_build_trace.js ./workspace
cp ${buildTraceFileName} ./workspace/${buildTraceFileName}
chmod 777 ./workspace
ls -la ./workspace
"""
// Run container to get snapshot
def dockerOpts = "--cap-add=SYS_ADMIN -v \"\$(pwd)/workspace:/workspace\" -e NODE_PATH=/home/pptruser/node_modules -e BUILD_TRACE_FILE=${buildTraceFileName}"
// Create unique image name by sanitizing job name
def sanitizedJobName = env.JOB_NAME.replaceAll(/[\/\\:*?"<>| ]/, '_').replaceAll('%2F', '_')
def architectureName = (buildTraceFileName =~ /(gfx[0-9a-zA-Z]+)/)[0][1]
def imageName = "perfetto_snapshot_${sanitizedJobName}_build_${env.BUILD_NUMBER}_${architectureName}.png"
sh """
docker run --rm ${dockerOpts} ${image} node /workspace/capture_build_trace.js
mv ./workspace/perfetto_snapshot_build.png ./workspace/${imageName}
"""
// Archive the snapshot
sh """
mv ./workspace/${imageName} ${imageName}
"""
archiveArtifacts "${imageName}"
// Notify the channel
withCredentials([string(credentialsId: 'ck_ci_build_perf_webhook_url', variable: 'WEBHOOK_URL')]) {
sh '''
# Create build trace filename with build number based on the original filename
BUILD_TRACE_WITH_NUMBER=$(echo "''' + buildTraceFileName + '''" | sed 's/.json/_''' + sanitizedJobName + '''_''' + env.BUILD_NUMBER + '''_''' + architectureName + '''.json/')
# Convert image to base64
echo "Converting image to base64..."
IMAGE_BASE64=$(base64 -w 0 ''' + imageName + ''')
echo "Image base64 length: ${#IMAGE_BASE64}"
# Convert build trace to base64
echo "Converting build trace to base64..."
BUILD_TRACE_BASE64=$(base64 -w 0 ''' + buildTraceFileName + ''')
echo "Build trace base64 length: ${#BUILD_TRACE_BASE64}"
# Create JSON payload with base64 data
echo "Creating JSON payload..."
{
printf '{\n'
printf ' "jobName": "%s",\n' "''' + env.JOB_NAME + '''"
printf ' "buildNumber": "%s",\n' "''' + env.BUILD_NUMBER + '''"
printf ' "jobUrl": "%s",\n' "''' + env.RUN_DISPLAY_URL + '''"
printf ' "imageName": "%s",\n' "''' + imageName + '''"
printf ' "architecture": "%s",\n' "''' + architectureName + '''"
printf ' "imageData": "%s",\n' "$IMAGE_BASE64"
printf ' "buildTraceName": "%s",\n' "$BUILD_TRACE_WITH_NUMBER"
printf ' "buildTraceData": "%s"\n' "$BUILD_TRACE_BASE64"
printf '}\n'
} > webhook_payload.json
echo "JSON payload created, size: $(wc -c < webhook_payload.json) bytes"
curl -X POST "${WEBHOOK_URL}" \
-H "Content-Type: application/json" \
-d @webhook_payload.json
# Clean up temporary file
rm -f webhook_payload.json
'''
}
} catch (Exception e) {
echo "Throwing error exception while generating build trace visualization"
echo 'Exception occurred: ' + e.toString()
}
}
def nthreads() {
def nproc = sh(returnStdout: true, script: 'nproc')
echo "Number of cores: ${nproc}"
def n = nproc.toInteger()
if (n > 64){
n = 64
}
echo "Number of threads used for building: ${n}"
return n
}
def runShell(String command){
def responseCode = sh returnStatus: true, script: "${command} > tmp.txt"
def output = readFile(file: "tmp.txt")
return (output != "")
}
def shouldRunCICheck() {
// File patterns that should not trigger CI
def skipFilePatterns = [
/^projects\/composablekernel\/\.github\/.*/, // GitHub workflow files
/^projects\/composablekernel\/docs\/.*/, // Documentation files
/^projects\/composablekernel\/LICENSE$/, // License file
/^projects\/composablekernel\/.*\.gitignore$/, // Git ignore files
/^projects\/composablekernel\/.*\.md$/ // Markdown files
]
try {
// Always run if this is a base branch build
def baseBranch = "develop"
def isBaseBranchBuild = (env.CHANGE_ID == null && env.BRANCH_NAME == baseBranch)
if (isBaseBranchBuild) {
echo "Base branch (${baseBranch}) build detected - always running CI for safety"
return true
}
// Get the list of changed files (all files touched in any commit, even if reverted)
def changedFiles = sh(
returnStdout: true,
script: '''
BASE_BRANCH="develop"
if [ "$CHANGE_ID" != "" ]; then
# For PR builds, get all files touched in any commit
echo "PR build detected, checking all touched files against origin/$CHANGE_TARGET" >&2
git log --name-only --pretty=format: origin/$CHANGE_TARGET..HEAD -- projects/composablekernel/ | sort -u | grep -v '^$' || true
else
# For feature branch builds, compare against merge-base with base branch
MERGE_BASE=$(git merge-base HEAD origin/$BASE_BRANCH 2>/dev/null || echo "HEAD~1")
echo "Branch build detected, checking all touched files since merge-base: $MERGE_BASE" >&2
git log --name-only --pretty=format: $MERGE_BASE..HEAD -- projects/composablekernel/ | sort -u | grep -v '^$' || true
fi
'''
).trim().split('\n')
if (changedFiles.size() == 1 && changedFiles[0] == '') {
echo "No changed files detected - this might be a manual trigger or merge commit, running CI for safety"
return true
}
echo "Changed files: ${changedFiles.join(', ')}"
// Separate files into those requiring CI and those that can be skipped
def filesRequiringCI = []
def skippedFiles = []
changedFiles.each { file ->
def shouldSkip = skipFilePatterns.any { pattern ->
file ==~ pattern
}
if (shouldSkip) {
skippedFiles.add(file)
} else {
filesRequiringCI.add(file)
}
}
// Debug output
if (skippedFiles.size() > 0) {
echo "Files that don't require CI (${skippedFiles.size()}):"
skippedFiles.each { echo " - ${it}" }
}
if (filesRequiringCI.size() > 0) {
echo "Files that require CI (${filesRequiringCI.size()}):"
filesRequiringCI.each { echo " - ${it}" }
return true
} else {
echo "Only non-relevant files changed, skipping CI"
return false
}
} catch (Exception e) {
echo "Error checking changed files: ${e.getMessage()}, running CI by default"
return true
}
}
def getBaseDockerImageName(){
def img
if (params.USE_CUSTOM_DOCKER != ""){
img = "${params.USE_CUSTOM_DOCKER}"
}
else{
img = "${env.CK_DOCKERHUB}:ck_ub24.04_rocm${params.ROCMVERSION}"
}
return img
}
def getDockerImageName(){
def img
def base_name = getBaseDockerImageName()
if (params.USE_CUSTOM_DOCKER != ""){
img = "${params.USE_CUSTOM_DOCKER}"
}
else{
if (params.COMPILER_VERSION == "") {
img = "${base_name}"
}
else{
if (params.COMPILER_COMMIT == ""){
img = "${base_name}_${params.COMPILER_VERSION}"
}
else{
def commit = "${params.COMPILER_COMMIT}"[0..6]
img = "${base_name}_${params.COMPILER_VERSION}_${commit}"
}
}
}
return img
}
def check_host() {
if ("${env.CK_SCCACHE}" != "null"){
def SCCACHE_SERVER="${env.CK_SCCACHE.split(':')[0]}"
echo "sccache server: ${SCCACHE_SERVER}"
sh "chmod +w -R ${env.WORKSPACE}"
sh '''ping -c 1 -p 6379 "${SCCACHE_SERVER}" | echo $? > tmp.txt'''
def output = readFile(file: "tmp.txt")
echo "tmp.txt contents: \$output"
return (output != "0")
}
else{
return 1
}
}
def check_arch_name(){
sh 'rocminfo | tee rocminfo.log'
if ( runShell('grep -n "gfx90a" rocminfo.log') ){
return "gfx90a"
}
else if ( runShell('grep -n "gfx942" rocminfo.log') ) {
return "gfx942"
}
else if ( runShell('grep -n "gfx101" rocminfo.log') ) {
return "gfx101"
}
else if ( runShell('grep -n "gfx103" rocminfo.log') ) {
return "gfx103"
}
else if ( runShell('grep -n "gfx11" rocminfo.log') ) {
return "gfx11"
}
else if ( runShell('grep -n "gfx120" rocminfo.log') ) {
return "gfx12"
}
else if ( runShell('grep -n "gfx908" rocminfo.log') ) {
return "gfx908"
}
else if ( runShell('grep -n "gfx950" rocminfo.log') ) {
return "gfx950"
}
else {
return ""
}
}
// Kept for backward compatibility with open PRs that call this via ck.groovy
// wrappers. New code should use pullImage(), which classifies failures as
// NodeFault/TransientFault instead of collapsing them to "Unable to locate image".
def getDockerImage(Map conf=[:]){
def image
if ( conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using special docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
//Check if image exists
def retimage
try
{
echo "Pulling image: ${image}"
retimage = docker.image("${image}")
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
retimage.pull()
}
}
catch(Exception ex)
{
error "Unable to locate image: ${image}"
}
return [retimage, image]
}
// ---------------------------------------------------------------------------
// Node-health retry. See docs/node_health_ci_resilience.md
// ---------------------------------------------------------------------------
// Cheap host-level probes — exit status is the answer, no log parsing.
def daemonUp() {
echo "Preflight: checking docker daemon"
sh(returnStatus:true, script:'docker info >/dev/null 2>&1') == 0
}
def driverUp() {
echo "Preflight: checking amdgpu driver"
sh(returnStatus:true, script:'test -e /sys/module/amdgpu/version') == 0
}
def devicesUp() {
echo "Preflight: checking GPU devices"
sh(returnStatus:true, script:'test -e /dev/kfd && ls /dev/dri/renderD* >/dev/null 2>&1') == 0
}
def cacheWritable() { sh(returnStatus:true, script:'D=${SCCACHE_DIR:-/.cache/sccache}; mkdir -p "$D/probe" 2>/dev/null') == 0 }
def diskOk(String path='/var/jenkins/workspace', int minGb=5) {
echo "Preflight: checking disk space on ${path} (minimum ${minGb}GB)"
sh(returnStdout:true, script:"df --output=avail -BG ${path} | tail -1 | tr -dc '0-9'").trim().toInteger() >= minGb
}
// In-container probe: image must already be pulled and authenticated before calling.
def gpuUsable(String image) { sh(returnStatus:true, script:"docker run --rm --device=/dev/kfd --device=/dev/dri ${image} rocminfo 2>/dev/null | grep -q gfx") == 0 }
// Fail fast with a NodeFault if this agent is unfit to build. Host-only — no image
// required. Image/registry/container faults are classified in the body by pullImage
// and the in-container GPU check, where the correct conf is available.
def preflight() {
echo "Preflight: starting node health checks on ${env.NODE_NAME}"
if (!daemonUp()) throw new org.ck.NodeFault('docker-daemon-down')
if (!driverUp()) throw new org.ck.NodeFault('driver-not-loaded')
if (!devicesUp()) throw new org.ck.NodeFault('gpu-devices-missing')
if (!diskOk()) throw new org.ck.NodeFault('disk-space-low')
echo "Preflight: all checks passed on ${env.NODE_NAME}"
// sccache cache-dir writability is not checked here: sccache runs inside
// the container, so /.cache/sccache on the host is always root-owned and
// a host-level mkdir probe would always fail (false NodeFault on every node).
}
// Like getDockerImage but classifies failures: dead daemon -> NodeFault,
// missing image -> config error (no retry), any other pull failure -> TransientFault.
// Preserves getDockerImage's credentials, plugin pull, and [retimage, image] return.
def pullImage(Map conf=[:]) {
def image = conf.get("docker_name", "") ?: getDockerImageName()
echo "Pulling image: ${image}"
if (!daemonUp()) throw new org.ck.NodeFault('docker-daemon-down')
def retimage = docker.image("${image}")
try {
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
retimage.pull()
}
}
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e) {
throw e // abort — never reclassify
}
catch (Exception e) {
if (!daemonUp()) throw new org.ck.NodeFault('docker-daemon-down') // daemon died mid-pull
def exists
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
exists = sh(returnStatus:true, script:"docker manifest inspect --insecure ${image} >/dev/null 2>&1") == 0
}
if (!exists) error("image not found: ${image}") // config error: no retry
throw new org.ck.TransientFault('registry-pull') // image exists, pull failed: glitch
}
return [retimage, image]
}
// Build a label expression that excludes already-tried nodes.
def exclude(String label, List nodes) {
if (!nodes) return label
return "(${label}) && " + nodes.collect { "!${it}" }.join(" && ")
}
// Retry body on the SAME node for TransientFault only. NodeFault and anything
// untyped propagate immediately so the outer loop can move to a different node.
def runInPlace(Closure body, int maxAttempts) {
int attempt = 0
while (true) {
try { body(); return }
catch (org.ck.TransientFault e) {
attempt++
if (attempt < maxAttempts) {
echo "transient fault, retry ${attempt}/${maxAttempts}: ${e.message}"
continue
}
throw e
}
}
}
// Allocate a healthy node, preflight it, run body. NodeFaults (or glitches that
// outlast in-place retries) reroute to a different node. Aborts and real build
// failures propagate without retry.
def runOnHealthyNode(String label, Closure body) {
int transientRetries = 2 // glitch retries on the SAME node before moving on
int nodeAttempts = 3 // how many DIFFERENT nodes to try before giving up
def excluded = []
for (int attempt = 0; attempt < nodeAttempts; attempt++) {
def attemptNode = null
try {
node(exclude(label, excluded)) {
attemptNode = env.NODE_NAME
echo "Node attempt ${attempt + 1}/${nodeAttempts} on ${attemptNode}"
preflight()
runInPlace(body, transientRetries)
}
return
}
catch (org.ck.NodeFault e) { echo "Node attempt ${attempt + 1}/${nodeAttempts} failed (node fault on ${attemptNode}): ${e.message}"; excluded << attemptNode }
catch (org.ck.TransientFault e) { echo "Node attempt ${attempt + 1}/${nodeAttempts} failed (glitch outlasted retries on ${attemptNode}): ${e.message}"; excluded << attemptNode }
// FlowInterruptedException (abort) and real build errors: propagate, no retry.
// buildAndTest sets failure status for real failures; abort needs no status update.
}
error("exhausted ${nodeAttempts} nodes: ${excluded.join(', ')}")
}
// Build and push a docker image, capturing its digest into the specified env var.
// If forceBuild is false, will skip building if the image already exists in the registry.
def buildAndPushDockerImage(String install_prefix, String image_name, String dockerExtraArgs, boolean forceBuild){
show_node_info()
env.DOCKER_BUILDKIT=1
checkoutComposableKernel()
def dockerArgs = "--build-arg PREFIX=${install_prefix} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
dockerArgs += " " + dockerExtraArgs
if(!forceBuild){
try{
echo "Checking for image: ${image_name}"
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
sh "docker manifest inspect --insecure ${image_name}"
}
echo "Image: ${image_name} found! Skipping building image"
return image_name
}
catch(Exception ex){
echo "Unable to locate image: ${image_name}. Will attempt to build image now."
}
}
echo "Building image: ${image_name} with args: ${dockerArgs}"
def retimage = docker.build("${image_name}", dockerArgs)
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
retimage.push()
}
def digest = sh(returnStdout: true, script: "docker inspect --format='{{index .RepoDigests 0}}' ${image_name}").trim()
echo "Built image digest: ${digest}"
echo "Pruning dangling Docker images to free disk space on CI agent"
sh "docker image prune -f --filter 'dangling=true' || true"
return digest
}
def buildDockerBase(install_prefix){
def image_name = getDockerImageName()
def base_image_name = getBaseDockerImageName()
echo "Building Docker for ${image_name}"
def dockerExtraArgs = " -f projects/composablekernel/Dockerfile . "
if(params.COMPILER_VERSION == "develop" || params.COMPILER_VERSION == "amd-staging" || params.COMPILER_COMMIT != ""){
dockerExtraArgs = " --no-cache --build-arg BASE_DOCKER='${base_image_name}' -f projects/composablekernel/Dockerfile.compiler . "
}
else if(params.COMPILER_VERSION == "therock"){
dockerExtraArgs = " --no-cache -f projects/composablekernel/Dockerfile . "
}
env.CK_BASE_IMAGE = buildAndPushDockerImage(install_prefix, image_name, dockerExtraArgs, params.BUILD_DOCKER.toBoolean())
}
def buildDockerPytorch(install_prefix){
def image_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_pytorch"
def dockerExtraArgs = " --no-cache -f projects/composablekernel/Dockerfile.pytorch --build-arg CK_PYTORCH_BRANCH='${params.ck_pytorch_branch}' . "
env.CK_PYTORCH_IMAGE = buildAndPushDockerImage(install_prefix, image_name, dockerExtraArgs, true)
}
def buildDockerAiter(install_prefix){
def image_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_aiter"
def dockerExtraArgs = " --no-cache -f projects/composablekernel/Dockerfile.aiter --build-arg AITER_BRANCH='${params.aiter_branch}' --build-arg CK_AITER_BRANCH='${params.ck_aiter_branch}' . "
env.CK_AITER_IMAGE = buildAndPushDockerImage(install_prefix, image_name, dockerExtraArgs, true)
}
def buildDockerFa(install_prefix){
def image_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_fa"
def dockerExtraArgs = " --no-cache -f projects/composablekernel/Dockerfile.fa"
dockerExtraArgs += " --build-arg BASE_DOCKER='${params.fa_base_docker}'"
dockerExtraArgs += " --build-arg FA_BRANCH='${params.fa_branch}'"
dockerExtraArgs += " --build-arg CK_FA_BRANCH='${params.ck_fa_branch}'"
dockerExtraArgs += " --build-arg GPU_ARCHS='gfx942;gfx950'"
dockerExtraArgs += " . "
env.CK_FA_IMAGE = buildAndPushDockerImage(install_prefix, image_name, dockerExtraArgs, true)
}
def buildDocker(install_prefix){
buildDockerBase(install_prefix)
if (params.RUN_PYTORCH_TESTS.toBoolean()) {
buildDockerPytorch(install_prefix)
}
if (params.RUN_AITER_TESTS.toBoolean()) {
buildDockerAiter(install_prefix)
}
if (params.RUN_FA_TESTS.toBoolean()) {
buildDockerFa(install_prefix)
}
}
def get_docker_options(){
def dockerOpts
if ( params.BUILD_INSTANCES_ONLY ){
dockerOpts = "--network=host --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
}
else{ //only add kfd and dri paths if you actually going to run somthing on GPUs
dockerOpts = "--network=host --device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
}
if (params.COMPILER_VERSION == "develop" || params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "therock" || params.COMPILER_COMMIT != ""){
// the --env COMPRESSED_BUNDLE_FORMAT_VERSION=2 env variable is required when building code with offload-compress flag with
// newer clang22 compilers and running with older hip runtima libraries
dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' --env COMPRESSED_BUNDLE_FORMAT_VERSION=2 --env HIP_PLATFORM=amd "
}
// on some machines the group ids for video and render groups may not be the same as in the docker image!
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} -v /var/jenkins/ref-repo/:/var/jenkins/ref-repo/ "
echo "Docker flags: ${dockerOpts}"
return dockerOpts
}
def build_client_examples(String arch){
def cmd = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/projects/composablekernel/install;/opt/rocm" \
-DGPU_TARGETS="${arch}" \
-DCMAKE_CXX_COMPILER="${params.BUILD_COMPILER}" \
-DCMAKE_HIP_COMPILER="${params.BUILD_COMPILER}" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
return cmd
}
def build_and_run_fmha(String arch){
def cmd = """ cmake -G Ninja -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/projects/composablekernel/install;/opt/rocm" \
-DGPU_TARGETS="${arch}" \
-DCMAKE_CXX_COMPILER="${params.BUILD_COMPILER}" \
-DCMAKE_HIP_COMPILER="${params.BUILD_COMPILER}" .. && \
ninja -j128 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ &&
example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" "${arch}" """
return cmd
}
def cmake_build(Map conf=[:]){
def config_targets = conf.get("config_targets","check")
def build_envs = "CTEST_PARALLEL_LEVEL=4 " + conf.get("build_env","")
def prefixpath = conf.get("prefixpath","/opt/rocm")
def setup_args = conf.get("setup_args","")
// make sure all unit tests always run on develop branch
def runAllUnitTests = (env.BRANCH_NAME == "develop") ? true : params.RUN_ALL_UNIT_TESTS
if (prefixpath != "/usr/local"){
setup_args = setup_args + " -DCMAKE_PREFIX_PATH=${prefixpath} "
}
//cmake_env can overwrite default CXX variables.
def cmake_envs
if(!setup_args.contains("gfx1250")){
cmake_envs = "CXX=${params.BUILD_COMPILER} CXXFLAGS='-Werror' " + conf.get("cmake_ex_env","")
}
else{ //use default compiler for gfx1250
cmake_envs = "CXX=/opt/rocm/llvm/bin/clang++ CXXFLAGS='-Werror' " + conf.get("cmake_ex_env","")
}
if(conf.get("build_install","") == "true")
{
config_targets = 'install ' + config_targets
setup_args = ' -DBUILD_DEV=On -DCMAKE_INSTALL_PREFIX=../install' + setup_args
} else{
setup_args = ' -DBUILD_DEV=On' + setup_args
}
if (params.DISABLE_DL_KERNELS){
setup_args = setup_args + " -DDISABLE_DL_KERNELS=ON "
}
setup_args = " -DCMAKE_BUILD_TYPE=release " + setup_args
def pre_setup_cmd = """
#!/bin/bash
cd projects/composablekernel
ulimit -c unlimited
rm -rf build
mkdir build
rm -rf install
mkdir install
cd build
"""
def invocation_tag=""
if (setup_args.contains("gfx12")){
invocation_tag="gfx12"
}
if (setup_args.contains("gfx11")){
invocation_tag="gfx11"
}
if (setup_args.contains("gfx101")){
invocation_tag="gfx101"
}
if (setup_args.contains("gfx103")){
invocation_tag="gfx103"
}
if (setup_args.contains("gfx908")){
invocation_tag="gfx908"
}
if (setup_args.contains("gfx90a")){
invocation_tag="gfx90a"
}
if (setup_args.contains("gfx94")){
invocation_tag="gfx94"
}
if (setup_args.contains("gfx95")){
invocation_tag="gfx95"
}
echo "invocation tag: ${invocation_tag}"
def redis_pre_setup_cmd = pre_setup_cmd
if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") {
redis_pre_setup_cmd = pre_setup_cmd + """
#!/bin/bash
export ROCM_PATH=/opt/rocm
export SCCACHE_ENABLED=true
export SCCACHE_LOG_LEVEL=debug
export SCCACHE_IDLE_TIMEOUT=14400
export COMPILERS_HASH_DIR=/tmp/.sccache
export SCCACHE_BIN=/usr/local/.cargo/bin/sccache
export SCCACHE_EXTRAFILES=/tmp/.sccache/rocm_compilers_hash_file
export SCCACHE_REDIS="redis://${env.CK_SCCACHE}"
echo "connect = ${env.CK_SCCACHE}" >> ../script/redis-cli.conf
export SCCACHE_C_CUSTOM_CACHE_BUSTER="${invocation_tag}"
echo \$SCCACHE_C_CUSTOM_CACHE_BUSTER
stunnel ../script/redis-cli.conf
../script/sccache_wrapper.sh --enforce_redis
"""
try {
def cmd1 = conf.get("cmd1", """
${redis_pre_setup_cmd}
""")
sh cmd1
setup_args = " -DCMAKE_HIP_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache " + setup_args
}
catch(Exception err){
echo "could not connect to redis server: ${err.getMessage()}. will not use sccache."
def cmd2 = conf.get("cmd2", """
${pre_setup_cmd}
""")
sh cmd2
}
}
else{
def cmd3 = conf.get("cmd3", """
${pre_setup_cmd}
""")
sh cmd3
}
// reduce parallelism when compiling, clang uses too much memory
def nt = nthreads()
def cmd
def setup_cmd
def build_cmd
def execute_cmd = conf.get("execute_cmd", "")
//check the node gpu architecture
def arch_name = check_arch_name()
if(!setup_args.contains("NO_CK_BUILD")){
if (params.NINJA_BUILD_TRACE) {
echo "running ninja build trace"
}
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
setup_args = " -D CK_EXPERIMENTAL_BUILDER=ON " + setup_args
}
if (params.RUN_ROCM_CK_TESTS) {
setup_args = " -D CK_ENABLE_ROCM_CK=ON " + setup_args
}
setup_cmd = conf.get(
"setup_cmd",
"""${cmake_envs} cmake -G Ninja ${setup_args} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS=" -O3 " .. """
)
// Smart-build: Only build if running all tests or forced
// Otherwise, smart-build will determine what to build after cmake configure
if (runAllUnitTests) {
build_cmd = conf.get(
"build_cmd",
"${build_envs} ninja -j${nt} ${config_targets}"
)
} else {
// Smart-build enabled: skip full build and execute_cmd (client examples)
build_cmd = ""
execute_cmd = ""
}
cmd = conf.get("cmd", """
${setup_cmd}
${build_cmd}
${execute_cmd}
""")
}
else{
cmd = conf.get("cmd", """
${execute_cmd}
""")
}
echo cmd
dir("projects/composablekernel/build"){
// Start sccache monitoring
if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") {
sh """
chmod +x ../script/monitor_sccache_during_build.sh
mkdir -p logs
export SCCACHE_C_CUSTOM_CACHE_BUSTER="${invocation_tag}"
../script/monitor_sccache_during_build.sh build_monitor &
MONITOR_PID=\$!
echo "Monitor PID: \$MONITOR_PID"
echo \$MONITOR_PID > monitor.pid
"""
}
try {
//build CK
sh cmd
if (runAllUnitTests){
// Archive artifacts if they were generated
if (fileExists("ck_build_trace_${arch_name}.json")) {
archiveArtifacts "ck_build_trace_${arch_name}.json"
}
if (fileExists("clang_build_analysis_${arch_name}.log")) {
archiveArtifacts "clang_build_analysis_${arch_name}.log"
}
// Process ninja build trace after full build
if(fileExists(".ninja_log")) {
sh "python3 ../script/ninja_json_converter.py .ninja_log --legacy-format --output ck_build_trace_${arch_name}.json"
archiveArtifacts "ck_build_trace_${arch_name}.json"
sh "python3 ../script/parse_ninja_trace.py ck_build_trace_${arch_name}.json"
}
if (params.NINJA_FTIME_TRACE) {
echo "running ClangBuildAnalyzer"
sh "/ClangBuildAnalyzer/build/ClangBuildAnalyzer --all . clang_build.log"
sh "/ClangBuildAnalyzer/build/ClangBuildAnalyzer --analyze clang_build.log > clang_build_analysis_${arch_name}.log"
archiveArtifacts "clang_build_analysis_${arch_name}.log"
}
}
} catch (Exception buildError) {
echo "Build failed: ${buildError.getMessage()}"
throw buildError
} finally {
// Stop sccache monitoring
if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") {
sh """
# Stop monitoring
if [ -f monitor.pid ]; then
MONITOR_PID=\$(cat monitor.pid)
kill \$MONITOR_PID 2>/dev/null || echo "Monitor already stopped"
rm -f monitor.pid
fi
"""
// Archive the monitoring logs
try {
archiveArtifacts artifacts: "logs/*monitor*.log", allowEmptyArchive: true
} catch (Exception e) {
echo "Could not archive sccache monitoring logs: ${e.getMessage()}"
}
}
}
//run tests except when NO_CK_BUILD is set and except on gfx1250
if(!setup_args.contains("NO_CK_BUILD")){
// run unit tests unless building library for all targets
// Note: This else block is when NINJA_BUILD_TRACE=false and BUILD_INSTANCES_ONLY=false
// So no ninja trace processing needed here
if (!params.BUILD_INSTANCES_ONLY){
if (!runAllUnitTests && !setup_args.contains("gfx1250") ){
// Smart Build: Run smart_build_and_test.sh
sh """
export WORKSPACE_ROOT=${env.WORKSPACE}
export PARALLEL=32
export NINJA_JOBS=${nt}
export ARCH_NAME=${arch_name}
export PROCESS_NINJA_TRACE=false
export NINJA_FTIME_TRACE=false
bash ../script/dependency-parser/smart_build_and_test.sh
"""
}
else{ //run all tests
if(!setup_args.contains("gfx1250")){
echo "Full test suite requested (RUN_ALL_UNIT_TESTS=true or develop branch)"
sh "ninja -j${nt} install check"
}
else{ //do not run tests on gfx1250, just build everything
echo "Building for gfx1250"
sh "ninja -j${nt} install"
}
if (params.RUN_ROCM_CK_TESTS) {
sh 'ninja check-rocm-ck'
}
if(params.BUILD_PACKAGES || params.BUILD_INSTANCES_ONLY){
echo "Build ckProfiler packages"
sh 'ninja -j64 package'
sh "mv composablekernel-ckprofiler_*.deb composablekernel-ckprofiler_1.2.0_amd64_${arch_name}.deb"
stash includes: "composablekernel-ckprofiler**.deb", name: "profiler_package_${arch_name}"
}
}
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
sh 'ninja check-builder'
}
}
}
}
if (params.RUN_CK_TILE_FMHA_TESTS){
try{
dir("projects/composablekernel"){
archiveArtifacts "perf_fmha_*.log"
stash includes: "perf_fmha_**.log", name: "perf_fmha_log_${arch_name}"
}
}
catch(Exception err){
echo "could not locate the requested artifacts: ${err.getMessage()}. will skip the stashing."
}
}
}
def buildAndTest(Map conf=[:]){
def isMainBuild = conf.get("is_main_build", false)
show_node_info()
checkoutComposableKernel()
def dockerOpts = get_docker_options()
def image
def retimage
setGithubStatus("${env.STAGE_NAME}", 'pending', "Starting ${env.STAGE_NAME}")
try {
(retimage, image) = pullImage(conf)
if (isMainBuild) {
// GPU must be present for the main per-arch build; fail fast if absent
withDockerContainer(image: image, args: dockerOpts) {
timeout(time: 2, unit: 'MINUTES'){
sh 'rocminfo | tee rocminfo.log'
if ( !runShell('grep -n "gfx" rocminfo.log') ){
throw new org.ck.NodeFault("GPU not found")
}
else{
echo "GPU is OK"
}
}
}
}
withDockerContainer(image: image, args: dockerOpts) {
timeout(time: 20, unit: 'HOURS')
{
cmake_build(conf)
if (isMainBuild && !conf.get("setup_args","").contains("gfx1250")) {
//check whether to run performance tests on this node
def arch = check_arch_name()
if ( params.RUN_INDUCTOR_TESTS && arch == "gfx90a" ){
echo "Run inductor codegen tests"
sh "projects/composablekernel/script/run_inductor_tests.sh"
}
if ( params.RUN_CODEGEN_TESTS && arch == "gfx90a" ){
echo "Run hiprtc codegen tests"
sh """ cd projects/composablekernel/codegen && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH=/opt/rocm -DCMAKE_CXX_COMPILER="${params.BUILD_COMPILER}" .. && \
make -j64 check """
}
// run performance tests, stash the logs, results will be processed on the master node
dir("projects/composablekernel/script"){
if (params.RUN_PERFORMANCE_TESTS){
if (params.RUN_FULL_QA && (arch == "gfx90a" || arch == "gfx942")){
// run full tests on gfx90a or gfx942
echo "Run full performance tests"
sh "./run_full_performance_tests.sh 0 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} ${arch}"
archiveArtifacts "perf_*.log"
stash includes: "perf_**.log", name: "perf_log_${arch}"
}
else if (!params.RUN_FULL_QA && (arch == "gfx90a" || arch == "gfx942")){
// run standard tests on gfx90a or gfx942
echo "Run performance tests"
sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} ${arch}"
archiveArtifacts "perf_*.log"
stash includes: "perf_**.log", name: "perf_log_${arch}"
}
else if ( arch != "gfx10"){
// run basic tests on gfx11/gfx12/gfx908/gfx950, but not on gfx10, it takes too long
echo "Run gemm performance tests"
sh "./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} ${arch}"
archiveArtifacts "perf_onnx_gemm_*.log"
stash includes: "perf_onnx_gemm_**.log", name: "perf_log_${arch}"
}
}
}
if (params.hipTensor_test && arch == "gfx90a" ){
// build and test hipTensor on gfx90a node
gitNetRetry("checkout hipTensor") {
sh """#!/bin/bash
git sparse-checkout add projects/hiptensor
git checkout "${params.hipTensor_branch}"
"""
}
sh """#!/bin/bash
cd projects/hiptensor && mkdir -p build &&
CC=hipcc CXX=hipcc cmake -Bbuild . -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/projects/composablekernel/install" &&
cmake --build build -- -j &&
ctest --test-dir build
"""
}
}
}
}
setGithubStatus("${env.STAGE_NAME}", 'success', "Stage ${env.STAGE_NAME} passed")
}
catch (org.ck.NodeFault e) { throw e } // reroute handled by runOnHealthyNode
catch (org.ck.TransientFault e) { throw e } // retry handled by runOnHealthyNode
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e) { throw e } // abort: no status update
catch (Exception e){
setGithubStatus("${env.STAGE_NAME}", 'failure', "Stage ${env.STAGE_NAME} failed")
throw e
}
return retimage
}
// Deprecated alias for the build-only entry point unified into buildAndTest.
// Kept so develop's Jenkinsfile (and PRs branched from it) still resolve against
// this ck.groovy once it lands on develop. Remove when no open branch calls it.
def buildHipClangJobAndReboot(Map conf=[:]){
return buildAndTest(conf)
}
def process_results(Map conf=[:]){
checkoutComposableKernel()
//use older image that has user jenkins
def image = "${env.CK_DOCKERHUB}:ck_ub22.04_rocm6.3"
setGithubStatus("${env.STAGE_NAME}", 'pending', 'Processing results...')
try {
try
{
echo "Pulling image: ${image}"
def retimage = docker.image("${image}")
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
retimage.pull()
}
}
catch(Exception ex)
{
error "Unable to locate image: ${image}"
}
}
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
setGithubStatus("${env.STAGE_NAME}", 'failure', "Stage ${env.STAGE_NAME} failed")
throw e
}
withDockerContainer(image: image, args: '--cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v=/var/jenkins/:/var/jenkins') {
timeout(time: 15, unit: 'MINUTES'){
try{
dir("projects/composablekernel/script"){
if (params.RUN_CK_TILE_FMHA_TESTS){
try{
unstash "perf_fmha_log_gfx942"
}
catch(Exception err){
echo "could not locate the FMHA performance logs for gfx942: ${err.getMessage()}."
}
try{
unstash "perf_fmha_log_gfx90a"
}
catch(Exception err){
echo "could not locate the FMHA performance logs for gfx90a: ${err.getMessage()}."
}
try{
unstash "perf_fmha_log_gfx950"
}
catch(Exception err){
echo "could not locate the FMHA performance logs for gfx950: ${err.getMessage()}."
}
}
if (params.BUILD_INSTANCES_ONLY){
// unstash deb packages
try{
unstash "lib_package"
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no composablekernel-*.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
}
catch(Exception err){
echo "could not locate lib_package."
}
}
if (params.BUILD_PACKAGES){
// unstash deb packages
try{
unstash "profiler_package_gfx90a"
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no composablekernel-ckprofiler*.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
}
catch(Exception err){
echo "could not locate profiler_package_gfx90a."
}
try{
unstash "profiler_package_gfx942"
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no composablekernel-ckprofiler*.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
}
catch(Exception err){
echo "could not locate profiler_package_gfx942."
}
try{
unstash "profiler_package_gfx950"
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no composablekernel-ckprofiler*.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
}
catch(Exception err){
echo "could not locate profiler_package_gfx950."
}
try{
unstash "profiler_package_gfx12"
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no composablekernel-ckprofiler*.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
}
catch(Exception err){
echo "could not locate profiler_package_gfx12."
}
}
else{
// unstash perf files to master
try{
unstash "perf_log_gfx90a"
}
catch(Exception err){
echo "could not locate the gfx90a performance logs: ${err.getMessage()}."
}
try{
unstash "perf_log_gfx942"
}
catch(Exception err){
echo "could not locate the gfx942 performance logs: ${err.getMessage()}."
}
try{
unstash "perf_log_gfx950"
}
catch(Exception err){
echo "could not locate the gfx950 performance logs: ${err.getMessage()}."
}
try{
unstash "perf_log_gfx908"
}
catch(Exception err){
echo "could not locate the gfx908 performance logs: ${err.getMessage()}."
}
try{
unstash "perf_log_gfx11"
}
catch(Exception err){
echo "could not locate the gfx11 performance logs: ${err.getMessage()}."
}
try{
unstash "perf_log_gfx12"
}
catch(Exception err){
echo "could not locate the gfx12 performance logs: ${err.getMessage()}."
}
}
// process the logs
sh "./process_perf_data.sh"
}
setGithubStatus("${env.STAGE_NAME}", 'success', "Stage ${env.STAGE_NAME} passed")
}
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
setGithubStatus("${env.STAGE_NAME}", 'failure', "Stage ${env.STAGE_NAME} failed")
throw e
}
finally{
echo "Finished processing performance test results"
}
}
}
}
def run_downstream_tests(Map conf=[:]){
show_node_info()
checkoutComposableKernel()
def dockerOpts = get_docker_options() + ' --group-add irc '
setGithubStatus("${env.STAGE_NAME}", 'pending', "Starting ${env.STAGE_NAME}")
try {
try
{
echo "Pulling image: ${conf.image}"
retimage = docker.image("${conf.image}")
withDockerRegistry([ credentialsId: "ck_docker_cred", url: "" ]) {
retimage.pull()
}
}
catch(Exception ex)
{
error "Unable to locate image: ${conf.image}"
}
}
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
setGithubStatus("${env.STAGE_NAME}", 'failure', "Stage ${env.STAGE_NAME} failed")
throw e
}
withDockerContainer(image: conf.image, args: dockerOpts) {
timeout(time: conf.get("timeoutHours", 2), unit: 'HOURS'){
try{
sh "rocminfo"
sh "python3 --version"
for (cmd in conf.execute_cmds) {
sh "${cmd}"
}
setGithubStatus("${env.STAGE_NAME}", 'success', "Stage ${env.STAGE_NAME} passed")
}
catch(e){
echo "Throwing error exception while running ${env.STAGE_NAME}"
echo 'Exception occurred: ' + e.toString()
setGithubStatus("${env.STAGE_NAME}", 'error', "Stage ${env.STAGE_NAME} failed")
throw e
}
finally{
echo "Finished running ${env.STAGE_NAME}"
}
}
}
}
def getPytorchTestsCmds() {
return [
"mkdir pytorch",
"cp -r /var/jenkins/workspace/pytorch/* pytorch/",
"ls -ltr pytorch",
"python3 pytorch/tools/amd_build/build_amd.py",
"cd pytorch && USE_ROCM_CK_SDPA=1 PYTORCH_ROCM_ARCH=gfx942 python3 setup.py develop"
]
}
def getAiterTestsCmds() {
return [
// Pre-compile FlyDSL MoE AOT cache before the tests.
"cd /home/jenkins/workspace/aiter && python3 aiter/aot/flydsl/moe.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_gemm_a8w8.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_gemm_a8w8_blockscale.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_mha.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_mha_varlen.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_batch_prefill.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe_2stage.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe_blockscale.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe_ep.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe_sorting.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe_sorting_mxfp4.py",
"python3 /home/jenkins/workspace/aiter/op_tests/test_moe_tkw1.py"
]
}
def getFaTestsCmds() {
return [
"python3 -u -m pytest /home/jenkins/workspace/flash-attention/tests/test_flash_attn_ck.py"
]
}
// All static checks in one container on a single node: clang-format (always),
// cppcheck (when RUN_CPPCHECK), then the ASCII-only and CRLF checks. Combined
// into a single buildAndTest, driven by one Jenkinsfile stage, to keep the
// declarative pipeline's WorkflowScript under the JVM 64KB method-size limit and
// to avoid per-check checkout/container overhead.
//
// Every check runs from projects/composablekernel (cmake_build runs execute_cmd
// from .../build, so the single leading `cd ..` lands there); no check changes
// directory, so chaining them with && is equivalent to the previous separate
// invocations. Checks run sequentially and fail fast on the first failure.
def runStaticChecks() {
def formatFiles = "find . -type f \\( -name '*.h' -o -name '*.hpp' -o -name '*.cpp' -o -name '*.h.in' -o -name '*.hpp.in' -o -name '*.cpp.in' -o -name '*.cl' \\) -not -path '*/build/*' -not -path '*/include/rapidjson/*'"
def checkFiles = "find . -type f \\( -name '*.h' -o -name '*.hpp' -o -name '*.cpp' -o -name '*.h.in' -o -name '*.hpp.in' -o -name '*.cpp.in' -o -name '*.inc' -o -name '*.cl' \\) -not -path '*/build/*' -not -path '*/include/rapidjson/*'"
def checks = []
checks << """${formatFiles} | xargs -P 8 -I{} sh -c 'clang-format-18 -style=file {} | diff -u - {} || (echo "ERROR: {} needs formatting" && exit 1)'"""
if (params.RUN_CPPCHECK) {
checks << """/cppcheck/build/bin/cppcheck ../* -v -j \$(nproc) -I ../include -I ../profiler/include -I ../library/include \
-D CK_ENABLE_FP64 -D CK_ENABLE_FP32 -D CK_ENABLE_FP16 -D CK_ENABLE_FP8 -D CK_ENABLE_BF16 -D CK_ENABLE_BF8 -D CK_ENABLE_INT8 \
-D __gfx908__ -D __gfx90a__ -D __gfx942__ -D __gfx1030__ -D __gfx1100__ -D __gfx1101__ -D __gfx1102__ \
-U __gfx803__ -U __gfx900__ -U __gfx906__ -U CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 \
--file-filter=*.cpp --force --enable=all --output-file=ck_cppcheck.log"""
}
checks << """${checkFiles} -print0 | xargs -0 -P 8 -n 64 script/check_ascii_only.sh"""
checks << """${checkFiles} -print0 | xargs -0 -P 8 -n 64 script/check_no_crlf.sh"""
buildAndTest(
setup_args: "NO_CK_BUILD",
setup_cmd: "",
build_cmd: "",
execute_cmd: "cd .. && " + checks.join(" && ")
)
if (params.RUN_CPPCHECK) {
archiveArtifacts "build/ck_cppcheck.log"
}
}
def runFullGroupedConvTileTests() {
buildAndTest(
setup_args: "NO_CK_BUILD",
build_type: 'Release',
execute_cmd: """
cmake .. --preset dev-gfx90a -D CK_TILE_DISPATCHER=ON -DDISPATCHER_RULE_SET=tests && \
make -j64 test_grouped_convnd_fwd_tile test_grouped_convnd_bwd_weight_tile test_grouped_convnd_bwd_data_tile && \
./bin/test_grouped_convnd_bwd_weight_tile && \
./bin/test_grouped_convnd_bwd_data_tile && \
./bin/test_grouped_convnd_fwd_tile"""
)
}
def runGroupedConvLargeCaseTests() {
buildAndTest(
setup_args: "NO_CK_BUILD",
build_type: 'Release',
execute_cmd: """
cmake .. --preset dev-gfx90a && \
make -j64 test_grouped_convnd_fwd_large_cases test_grouped_convnd_bwd_data_large_cases test_grouped_convnd_fwd_bias_clamp_large_cases && \
./bin/test_grouped_convnd_fwd_large_cases && \
./bin/test_grouped_convnd_bwd_data_large_cases && \
./bin/test_grouped_convnd_fwd_bias_clamp_large_cases"""
)
}
def runComprehensiveConvDatasetTests() {
buildAndTest(
setup_args: "NO_CK_BUILD",
build_type: 'Release',
execute_cmd: """
cd ../build && \
cmake .. --preset dev-gfx90a && \
make -j64 test_grouped_convnd_fwd_dataset_xdl \
test_grouped_convnd_bwd_data_dataset_xdl \
test_grouped_convnd_bwd_weight_dataset_xdl && \
cd ../test_data && \
./generate_test_dataset.sh small && \
cd ../build && \
./bin/test_grouped_convnd_fwd_dataset_xdl && \
./bin/test_grouped_convnd_bwd_data_dataset_xdl && \
./bin/test_grouped_convnd_bwd_weight_dataset_xdl"""
)
}
def runTileEngineBasicTests(String compiler) {
buildAndTest(
setup_args: "NO_CK_BUILD",
build_type: 'Release',
execute_cmd: """
cmake -G Ninja -D CMAKE_PREFIX_PATH=/opt/rocm \
-D BUILD_CK_TILE_ENGINE="ON" \
-D CMAKE_CXX_COMPILER="${compiler}" \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx942" \
-D GEMM_UNIVERSAL_DATATYPE="fp8;fp16" \
-D GEMM_UNIVERSAL_LAYOUT="rcr;rrr;crr;ccr" \
-D GEMM_UNIVERSAL_CONFIG_FILE="default_ci_config.json" \
-D GEMM_MULTI_D_DATATYPE="fp16" \
-D GEMM_MULTI_D_LAYOUT="rcrr;rrrr;crrr;ccrr" \
-D GEMM_MULTI_D_CONFIG_FILE="default_ci_config.json" \
-D GEMM_PRESHUFFLE_DATATYPE="fp16;fp8;bf16;bf8" \
-D GEMM_PRESHUFFLE_LAYOUT="rcr" \
-D GEMM_PRESHUFFLE_CONFIG_FILE="default_ci_config.json" .. && \
ninja -j${nthreads()} benchmark_gemm_universal_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all && \
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_universal_results.json && \
python3 ../tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_preshuffle_results.json && \
python3 ../tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_multi_d_results.json"""
)
}
def runTileEngineGemmTests(String arch, String compiler) {
def execute_cmd
if (arch == "gfx942") {
execute_cmd = """
cmake -G Ninja -D CMAKE_PREFIX_PATH=/opt/rocm \
-D BUILD_CK_TILE_ENGINE="ON" \
-D CMAKE_CXX_COMPILER="${compiler}" \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx942" \
-D GEMM_UNIVERSAL_DATATYPE="fp8;fp16;bf8;bf16" \
-D GEMM_UNIVERSAL_LAYOUT="rcr;rrr;crr;ccr" \
-D GEMM_STREAMK_DATATYPE="fp8;fp16" \
-D GEMM_STREAMK_LAYOUT="rcr" \
-D GEMM_MULTI_D_DATATYPE="fp16" \
-D GEMM_MULTI_D_LAYOUT="rcrr;rrrr;crrr;ccrr" \
-D GEMM_PRESHUFFLE_DATATYPE="fp16;fp8;bf16;bf8" \
-D GEMM_PRESHUFFLE_LAYOUT="rcr" \
-D GROUPED_GEMM_DATATYPE="fp8;fp16" \
-D GROUPED_GEMM_LAYOUT="rcr;rrr;crr;ccr" \
-D GEMM_MULTI_ABD_DATATYPE="fp16" \
-D GEMM_MULTI_ABD_LAYOUT="rcrr" \
-D BATCHED_CONTRACTION_DATATYPE="fp16" \
-D BATCHED_CONTRACTION_LAYOUT="rcr" \
-D GEMM_ROWCOLQUANT_DATATYPE="fp8;bf8" \
-D GEMM_ROWCOLQUANT_LAYOUT="rcr" \
-D GEMM_TENSOR_QUANT_DATATYPE="fp8;bf8" \
-D GEMM_TENSOR_QUANT_LAYOUT="rcr" \
-D GROUPED_GEMM_ROWCOLQUANT_DATATYPE="fp8;bf8" \
-D GROUPED_GEMM_ROWCOLQUANT_LAYOUT="rcr" \
-D GROUPED_GEMM_TENSORQUANT_DATATYPE="fp8;bf8" \
-D GROUPED_GEMM_TENSORQUANT_LAYOUT="rcr" \
-D BATCHED_GEMM_DATATYPE="fp16" \
-D BATCHED_GEMM_LAYOUT="rcr" \
-D TILE_ENGINE_SAMPLING_TIER=daily .. && \
ninja -j${nthreads()} benchmark_gemm_universal_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all benchmark_gemm_streamk_all benchmark_grouped_gemm_all benchmark_gemm_multi_abd_all benchmark_batched_contraction_all benchmark_gemm_rowcolquant_all benchmark_gemm_tensor_quant_all benchmark_grouped_gemm_rowcolquant_all benchmark_grouped_gemm_tensorquant_all benchmark_batched_gemm_all && \
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_universal_results.json && \
python3 ../tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_preshuffle_results.json && \
python3 ../tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_multi_d_results.json && \
python3 ../tile_engine/ops/gemm/grouped_gemm/grouped_gemm_benchmark.py . --problem-sizes "1024,1024,1024" --group-counts 8 --warmup 5 --repeat 5 --verbose --json grouped_gemm_results.json && \
python3 ../tile_engine/ops/gemm/gemm_multi_abd/gemm_multi_abd_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_multi_abd_results.json && \
python3 ../tile_engine/ops/gemm/batched_contraction/batched_contraction_benchmark.py . --problem-configs "g=2;m=1024;n=1024;k=1024" --warmup 5 --repeat 5 --verbose --json batched_contraction_results.json && \
python3 ../tile_engine/ops/gemm/block_scale_gemm/gemm_rowcolquant/gemm_rowcolquant_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_rowcolquant_results.json && \
python3 ../tile_engine/ops/gemm/block_scale_gemm/gemm_tensor_quant/gemm_tensor_quant_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_tensor_quant_results.json && \
python3 ../tile_engine/ops/gemm/grouped_gemm_quant/grouped_gemm_rowcolquant/grouped_gemm_rowcolquant_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json grouped_gemm_rowcolquant_results.json && \
python3 ../tile_engine/ops/gemm/grouped_gemm_quant/grouped_gemm_tensorquant/grouped_gemm_tensorquant_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json grouped_gemm_tensorquant_results.json && \
python3 ../tile_engine/ops/gemm/batched_gemm/batched_gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json batched_gemm_results.json """
} else if (arch == "gfx950") {
execute_cmd = """
cmake -G Ninja -D CMAKE_PREFIX_PATH=/opt/rocm \
-D BUILD_CK_TILE_ENGINE="ON" \
-D CMAKE_CXX_COMPILER="${compiler}" \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx950" \
-D GEMM_UNIVERSAL_DATATYPE="fp8;fp16" \
-D GEMM_UNIVERSAL_LAYOUT="rcr;rrr;crr;ccr" \
-D GEMM_MULTI_D_DATATYPE="fp16" \
-D GEMM_MULTI_D_LAYOUT="rcrr;rrrr;crrr;ccrr" \
-D GEMM_PRESHUFFLE_DATATYPE="fp16;fp8;bf16;bf8" \
-D GEMM_PRESHUFFLE_LAYOUT="rcr" \
-D MX_GEMM_DATATYPE="fp4;fp8" \
-D MX_GEMM_LAYOUT="rcr" \
-D TILE_ENGINE_SAMPLING_TIER=daily .. && \
ninja -j${nthreads()} benchmark_gemm_universal_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all && \
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_universal_results.json && \
python3 ../tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_preshuffle_results.json && \
python3 ../tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_multi_d_results.json && \
python3 ../tile_engine/ops/gemm/mx_gemm/mx_gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json mx_gemm_results.json """
} else if (arch == "gfx1201") {
execute_cmd = """
cmake -G Ninja -D CMAKE_PREFIX_PATH=/opt/rocm \
-D BUILD_CK_TILE_ENGINE="ON" \
-D CMAKE_CXX_COMPILER="${compiler}" \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx1201" \
-D GEMM_UNIVERSAL_DATATYPE="fp16" \
-D GEMM_UNIVERSAL_LAYOUT="rcr;rrr;crr;ccr" \
-D TILE_ENGINE_SAMPLING_TIER=daily .. && \
ninja -j${nthreads()} benchmark_gemm_universal_all && \
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json gemm_universal_results.json"""
}
buildAndTest(setup_args: "NO_CK_BUILD", build_type: 'Release', execute_cmd: execute_cmd)
}
def runBuildCKAndTests(String arch) {
def gpuTarget
def extraSetupArgs = ""
def execute_cmd = ""
def extraBuildArgs = [:]
switch (arch) {
case "gfx90a":
gpuTarget = "gfx90a"
extraSetupArgs = " -DCK_CXX_STANDARD=\"17\""
execute_cmd = build_client_examples(gpuTarget)
break
case "gfx1250":
gpuTarget = "gfx1250"
extraSetupArgs = " -DDISABLE_DL_KERNELS=\"ON\""
extraBuildArgs = [docker_name: "${env.CK_DOCKERHUB_PRIVATE}:ck_ub24.04_gfx1250"]
break
case "gfx10-1-generic":
case "gfx10-3-generic":
case "gfx11-generic":
case "gfx12-generic":
gpuTarget = arch
execute_cmd = build_client_examples(gpuTarget)
break
default:
gpuTarget = arch
execute_cmd = build_client_examples(gpuTarget)
}
def setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="${gpuTarget}"${extraSetupArgs} """
def buildArgs = [setup_args: setup_args, config_targets: "install", build_type: 'Release', prefixpath: '/usr/local', is_main_build: true]
if (execute_cmd) {
buildArgs.execute_cmd = execute_cmd
}
buildArgs.putAll(extraBuildArgs)
buildAndTest(buildArgs)
}
def runBuildInstancesOnly(String compiler) {
buildAndTest(
setup_args: "NO_CK_BUILD",
build_cmd: "",
build_type: 'Release',
execute_cmd: """
cmake -G Ninja -D CMAKE_PREFIX_PATH=/opt/rocm \
-DCMAKE_CXX_COMPILER="${compiler}" \
-DCMAKE_HIP_COMPILER="${compiler}" \
-DGPU_ARCHS="gfx908;gfx90a;gfx942;gfx950;gfx10-3-generic;gfx11-generic;gfx12-generic" \
-D CMAKE_BUILD_TYPE=Release .. && ninja -j64"""
)
}