mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
[CI] Add CI build and test stage on MI300. (#1185)
[ROCm/composable_kernel commit: cf86621170]
This commit is contained in:
92
Jenkinsfile
vendored
92
Jenkinsfile
vendored
@@ -1,5 +1,5 @@
|
||||
def rocmnode(name) {
|
||||
return '(rocmtest || miopen) && ' + name
|
||||
return '(rocmtest || miopen) && (' + name + ')'
|
||||
}
|
||||
|
||||
def show_node_info() {
|
||||
@@ -7,6 +7,7 @@ def show_node_info() {
|
||||
echo "NODE_NAME = \$NODE_NAME"
|
||||
lsb_release -sd
|
||||
uname -r
|
||||
cat /sys/module/amdgpu/version
|
||||
ls /opt/ -la
|
||||
"""
|
||||
}
|
||||
@@ -33,6 +34,10 @@ def runShell(String command){
|
||||
|
||||
def getDockerImageName(){
|
||||
def img
|
||||
if (params.USE_CUSTOM_DOCKER != ""){
|
||||
img = "${params.USE_CUSTOM_DOCKER}"
|
||||
}
|
||||
else{
|
||||
if (params.ROCMVERSION != "6.0.1"){
|
||||
if (params.COMPILER_VERSION == "") {
|
||||
img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
|
||||
@@ -61,6 +66,7 @@ def getDockerImageName(){
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return img
|
||||
}
|
||||
|
||||
@@ -365,8 +371,8 @@ def runCKProfiler(Map conf=[:]){
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
sh 'rocminfo | tee rocminfo.log'
|
||||
if ( !runShell('grep -n "gfx" rocminfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
@@ -379,20 +385,6 @@ def runCKProfiler(Map conf=[:]){
|
||||
echo "The job was cancelled or aborted"
|
||||
throw e
|
||||
}
|
||||
catch(Exception ex) {
|
||||
retimage = docker.build("${image}", dockerArgs + " --no-cache .")
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
@@ -473,6 +465,7 @@ def Build_CK(Map conf=[:]){
|
||||
show_node_info()
|
||||
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
env.DOCKER_BUILDKIT=1
|
||||
checkout scm
|
||||
|
||||
def image = getDockerImageName()
|
||||
@@ -487,25 +480,35 @@ def Build_CK(Map conf=[:]){
|
||||
if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
|
||||
dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
|
||||
}
|
||||
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
|
||||
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
|
||||
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} "
|
||||
echo "Docker flags: ${dockerOpts}"
|
||||
|
||||
def variant = env.STAGE_NAME
|
||||
def retimage
|
||||
def navi_node = 0
|
||||
def mi300_node = 0
|
||||
|
||||
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
try {
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
sh 'rocminfo | tee rocminfo.log'
|
||||
if ( !runShell('grep -n "gfx" rocminfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){
|
||||
if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') ){
|
||||
navi_node = 1
|
||||
echo "This is a Navi node"
|
||||
}
|
||||
if ( runShell('grep -n "gfx942" rocminfo.log') ){
|
||||
mi300_node = 1
|
||||
echo "This is MI300 node"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -514,23 +517,6 @@ def Build_CK(Map conf=[:]){
|
||||
echo "The job was cancelled or aborted"
|
||||
throw e
|
||||
}
|
||||
catch(Exception ex) {
|
||||
retimage = docker.build("${image}", dockerArgs + " --no-cache .")
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){
|
||||
navi_node = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
{
|
||||
@@ -544,8 +530,8 @@ def Build_CK(Map conf=[:]){
|
||||
sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
|
||||
stash "ckProfiler.tar.gz"
|
||||
}
|
||||
if (params.RUN_FULL_QA){
|
||||
// build deb packages
|
||||
if (params.RUN_FULL_QA && mi300_node == 0 ){
|
||||
// build deb packages for all MI100/200/300 targets and prepare to export
|
||||
sh 'make -j package'
|
||||
archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb'
|
||||
archiveArtifacts artifacts: 'composablekernel-tests_*.deb'
|
||||
@@ -610,7 +596,7 @@ def process_results(Map conf=[:]){
|
||||
def variant = env.STAGE_NAME
|
||||
def retimage
|
||||
|
||||
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
try {
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
}
|
||||
@@ -678,6 +664,10 @@ pipeline {
|
||||
name: "BUILD_DOCKER",
|
||||
defaultValue: false,
|
||||
description: "Force building docker image (default: false), set to true if docker image needs to be updated.")
|
||||
string(
|
||||
name: 'USE_CUSTOM_DOCKER',
|
||||
defaultValue: '',
|
||||
description: 'If you want to use a custom docker image, please scecify it here (default: OFF).')
|
||||
string(
|
||||
name: 'ROCMVERSION',
|
||||
defaultValue: '6.0',
|
||||
@@ -828,6 +818,26 @@ pipeline {
|
||||
cleanWs()
|
||||
}
|
||||
}
|
||||
stage("Build CK and run Tests on MI300")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
agent{ label rocmnode("gfx942") }
|
||||
environment{
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx942" -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
|
||||
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
|
||||
-DGPU_TARGETS="gfx942" \
|
||||
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
|
||||
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
|
||||
}
|
||||
steps{
|
||||
Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
|
||||
cleanWs()
|
||||
}
|
||||
}
|
||||
stage("Build CK and run Tests on MI100/MI200")
|
||||
{
|
||||
when {
|
||||
|
||||
Reference in New Issue
Block a user