diff --git a/Jenkinsfile b/Jenkinsfile index 824437c970..f065d4ecc5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -140,6 +140,10 @@ def reboot(){ build job: 'reboot-slaves', propagate: false , parameters: [string(name: 'server', value: "${env.NODE_NAME}"),] } + + + + def buildHipClangJobAndReboot(Map conf=[:]){ try{ buildHipClangJob(conf) @@ -156,6 +160,93 @@ def buildHipClangJobAndReboot(Map conf=[:]){ } } + +def runCKProfiler(Map conf=[:]){ + show_node_info() + + env.HSA_ENABLE_SDMA=0 + checkout scm + + def image = "composable_kernels" + def prefixpath = conf.get("prefixpath", "/opt/rocm") + def gpu_arch = conf.get("gpu_arch", "gfx908") + + // Jenkins is complaining about the render group + // def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" + def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" + if (conf.get("enforce_xnack_on", false)) { + dockerOpts = dockerOpts + " --env HSA_XNACK=1" + } + def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg GPU_ARCH='${gpu_arch}' " + + def variant = env.STAGE_NAME + + + def retimage + gitStatusWrapper(credentialsId: '7126e5fe-eb51-4576-b52b-9aaf1de8f0fd', gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { + try { + retimage = docker.build("${image}", dockerArgs + '.') + withDockerContainer(image: image, args: dockerOpts) { + timeout(time: 5, unit: 'MINUTES') + { + sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo' + } + } + } + catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){ + echo "The job was cancelled or aborted" + throw e + } + catch(Exception ex) { + retimage = docker.build("${image}", dockerArgs + "--no-cache .") + withDockerContainer(image: image, args: dockerOpts) { + timeout(time: 5, unit: 'MINUTES') + { + sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo' + } + } + } + + withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { + timeout(time: 5, unit: 'HOURS') + { + cmake_build(conf) + dir("script"){ + def perf_log = "perf_gemm_${gpu_arch}.log" + def artifact = "profile_gemm_${gpu_arch}.txt" + sh "./profile_gemm.sh gemm 0 0 0 1 0 5 | tee ${perf_log} ||true" + sh "./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a ${perf_log} ||true" + sh "./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a ${perf_log} ||true" + sh "./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a ${perf_log} || true" + //results will be parsed, stored, and analyzed within the python script + //the script will return 0 if the performance criteria are met + //or return 1 if the criteria are not met + sh "python3 parse_perf_data.py ${perf_log} | tee ${artifact}" + } + } + } + } + return retimage +} + + +def runPerfTest(Map conf=[:]){ + try{ + runCKProfiler(conf) + } + catch(e){ + echo "throwing error exception in performance tests" + echo 'Exception occurred: ' + e.toString() + throw e + } + finally{ + if (!conf.get("no_reboot", false)) { + reboot() + } + } +} + + pipeline { agent none options { @@ -178,18 +269,19 @@ pipeline { // buildHipClangJobAndReboot(build_cmd: build_cmd, no_reboot:true, prefixpath: '/opt/rocm', build_type: 'debug') // } // } - stage('Build Profiler: Release, gfx908') - { - agent { label rocmnode("nogpu")} - environment{ - setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """ - } - steps{ - buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') - } - } + // we will build and run ckProfiler release version later, during the performance test stage + //stage('Build Profiler: Release, gfx908') + //{ + // agent { label rocmnode("nogpu")} + // environment{ + // setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """ + // } + // steps{ + // buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') + // } + //} stage('Build Profiler: Debug, gfx908') - { + { agent { label rocmnode("nogpu")} environment{ setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """ @@ -249,6 +341,24 @@ pipeline { } } + stage("Performance Tests") + { + parallel + { + stage("Run ckProfiler: gfx908") + { + agent{ label rocmnode("gfx908")} + environment{ + setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """ + } + steps{ + runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') + } + + } + + } + } // enable after the cmake file supports packaging // stage("Packages") { // when { diff --git a/script/parse_perf_data.py b/script/parse_perf_data.py new file mode 100644 index 0000000000..3e41f8c4cf --- /dev/null +++ b/script/parse_perf_data.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +import os, io +import argparse + +def print_to_string(*args, **kwargs): + output = io.StringIO() + print(*args, file=output, **kwargs) + contents = output.getvalue() + output.close() + return contents + +def parse_args(): + parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs') + parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files') + args = parser.parse_args() + files = [] + if os.path.isdir(args.filename): + all_files = os.listdir(args.filename) + for name in all_files: + if not 'log' in name: + continue + files.append(os.path.join(args.filename, name)) + else: + files = [args.filename] + args.files = files + return args + +def main(): + args = parse_args() + results = [] + #parse results + glue="" + for filename in args.files: + for line in open(filename): + if 'Best Perf' in line: + lst=line.split() + results.append(print_to_string(glue.join(lst[8:]),lst[4])) + + #sort results + + #read baseline results for the latest develop branch + + #write new results to the db + + #compare the results to the baseline + + #return 0 if performance criteria met, otherwise return 1 + + print(results) + return 0 + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/script/profile_gemm.sh b/script/profile_gemm.sh index 036d0440e0..b816c5101f 100755 --- a/script/profile_gemm.sh +++ b/script/profile_gemm.sh @@ -1,12 +1,10 @@ #!/bin/bash ## GPU visibility - export HIP_VISIBLE_DEVICES=0 - - make -j ckProfiler - - DRIVER="./profiler/ckProfiler" - +export HIP_VISIBLE_DEVICES=0 +#make -j ckProfiler +DRIVER="../build/bin/ckProfiler" +echo $DRIVER OP=$1 DATATYPE=$2 LAYOUT=$3 @@ -43,3 +41,13 @@ $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 + +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 6656 8192 8192 -1 -1 -1 +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3328 4096 4096 -1 -1 -1 +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1664 2048 2048 -1 -1 -1 +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 832 1024 1024 -1 -1 -1 + +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7040 8192 8192 -1 -1 -1 +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 5120 5632 4096 -1 -1 -1 +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2560 2816 2048 -1 -1 -1 +$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1280 1408 1024 -1 -1 -1