mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Refactor CI performance tests. (#1726)
* merge the build and performance tests CI stages together * add gemm performance test on gfx11/gfx12 * add suffices to distinguish gemm performance logs from different archs * use smaller gemm set in CI for gfx10/gfx11/gfx12 * disable performance tests on gfx1030 * fix the shashing logic * fix finding python3 for mha instances
This commit is contained in:
286
Jenkinsfile
vendored
286
Jenkinsfile
vendored
@@ -330,10 +330,8 @@ def cmake_build(Map conf=[:]){
|
||||
try{
|
||||
archiveArtifacts "perf_fmha_fwd_*.log"
|
||||
archiveArtifacts "perf_fmha_bwd_*.log"
|
||||
stash name: "perf_fmha_fwd_gfx942.log"
|
||||
stash name: "perf_fmha_bwd_gfx942.log"
|
||||
stash name: "perf_fmha_fwd_gfx90a.log"
|
||||
stash name: "perf_fmha_bwd_gfx90a.log"
|
||||
stash includes: "perf_fmha_**_gfx942.log", name: "perf_fmha_log_gfx942"
|
||||
stash includes: "perf_fmha_**_gfx90a.log", name: "perf_fmha_log_gfx90a"
|
||||
}
|
||||
catch(Exception err){
|
||||
echo "could not locate the requested artifacts: ${err.getMessage()}. will skip the stashing."
|
||||
@@ -408,128 +406,6 @@ def buildHipClangJobAndReboot(Map conf=[:]){
|
||||
}
|
||||
}
|
||||
|
||||
def runCKProfiler(Map conf=[:]){
|
||||
show_node_info()
|
||||
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
checkout scm
|
||||
|
||||
def image = getDockerImageName()
|
||||
def prefixpath = conf.get("prefixpath", "/opt/rocm")
|
||||
|
||||
// Jenkins is complaining about the render group
|
||||
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
if (conf.get("enforce_xnack_on", false)) {
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
|
||||
}
|
||||
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
|
||||
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
|
||||
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} "
|
||||
echo "Docker flags: ${dockerOpts}"
|
||||
|
||||
def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
|
||||
|
||||
def variant = env.STAGE_NAME
|
||||
def retimage
|
||||
|
||||
gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
try {
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'rocminfo | tee rocminfo.log'
|
||||
if ( !runShell('grep -n "gfx" rocminfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
|
||||
echo "The job was cancelled or aborted"
|
||||
throw e
|
||||
}
|
||||
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
{
|
||||
sh """
|
||||
rm -rf build
|
||||
mkdir build
|
||||
"""
|
||||
dir("build"){
|
||||
unstash 'ckProfiler.tar.gz'
|
||||
sh 'tar -xvf ckProfiler.tar.gz'
|
||||
}
|
||||
|
||||
dir("script"){
|
||||
if (params.RUN_FULL_QA){
|
||||
sh "./run_full_performance_tests.sh 0 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
archiveArtifacts "perf_batched_gemm.log"
|
||||
archiveArtifacts "perf_grouped_gemm.log"
|
||||
archiveArtifacts "perf_grouped_conv_fwd.log"
|
||||
archiveArtifacts "perf_grouped_conv_bwd_data.log"
|
||||
archiveArtifacts "perf_grouped_conv_bwd_weight.log"
|
||||
archiveArtifacts "perf_gemm_bilinear.log"
|
||||
archiveArtifacts "perf_reduction.log"
|
||||
archiveArtifacts "perf_splitK_gemm.log"
|
||||
archiveArtifacts "perf_onnx_gemm.log"
|
||||
archiveArtifacts "perf_mixed_gemm.log"
|
||||
// stash perf files to master
|
||||
stash name: "perf_gemm.log"
|
||||
stash name: "perf_resnet50_N256.log"
|
||||
stash name: "perf_resnet50_N4.log"
|
||||
stash name: "perf_batched_gemm.log"
|
||||
stash name: "perf_grouped_gemm.log"
|
||||
stash name: "perf_grouped_conv_fwd.log"
|
||||
stash name: "perf_grouped_conv_bwd_data.log"
|
||||
stash name: "perf_grouped_conv_bwd_weight.log"
|
||||
stash name: "perf_gemm_bilinear.log"
|
||||
stash name: "perf_reduction.log"
|
||||
stash name: "perf_splitK_gemm.log"
|
||||
stash name: "perf_onnx_gemm.log"
|
||||
stash name: "perf_mixed_gemm.log"
|
||||
//we will process results on the master node
|
||||
}
|
||||
else{
|
||||
sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
// stash perf files to master
|
||||
stash name: "perf_gemm.log"
|
||||
stash name: "perf_resnet50_N256.log"
|
||||
stash name: "perf_resnet50_N4.log"
|
||||
//we will process the results on the master node
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return retimage
|
||||
}
|
||||
|
||||
def runPerfTest(Map conf=[:]){
|
||||
try{
|
||||
runCKProfiler(conf)
|
||||
}
|
||||
catch(e){
|
||||
echo "throwing error exception in performance tests"
|
||||
echo 'Exception occurred: ' + e.toString()
|
||||
throw e
|
||||
}
|
||||
finally{
|
||||
if (!conf.get("no_reboot", false)) {
|
||||
reboot()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def Build_CK(Map conf=[:]){
|
||||
show_node_info()
|
||||
|
||||
@@ -589,36 +465,95 @@ def Build_CK(Map conf=[:]){
|
||||
throw e
|
||||
}
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
timeout(time: 12, unit: 'HOURS')
|
||||
{
|
||||
//check whether to run performance tests on this node
|
||||
def do_perf_tests = 0
|
||||
def arch_type = 0
|
||||
sh 'rocminfo | tee rocminfo.log'
|
||||
if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') || runShell('grep -n "gfx1201" rocminfo.log') || runShell('grep -n "gfx942" rocminfo.log') ){
|
||||
do_perf_tests = 1
|
||||
echo "Stash profiler and run performance tests"
|
||||
if ( runShell('grep -n "gfx90a" rocminfo.log') ){
|
||||
arch_type = 1
|
||||
}
|
||||
else if ( runShell('grep -n "gfx942" rocminfo.log') ) {
|
||||
arch_type = 2
|
||||
}
|
||||
else if ( runShell('grep -n "gfx1030" rocminfo.log') ) {
|
||||
arch_type = 3
|
||||
}
|
||||
else if ( runShell('grep -n "gfx1101" rocminfo.log') ) {
|
||||
arch_type = 4
|
||||
}
|
||||
else if ( runShell('grep -n "gfx1201" rocminfo.log') ) {
|
||||
arch_type = 5
|
||||
}
|
||||
cmake_build(conf)
|
||||
dir("build"){
|
||||
//run tests and examples
|
||||
//sh 'make -j check'
|
||||
if (params.RUN_PERFORMANCE_TESTS && do_perf_tests == 0 ){
|
||||
//we only need the ckProfiler to run the performance tests, so we pack and stash it
|
||||
//do not stash profiler on nodes where we don't need to run performance tests
|
||||
sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
|
||||
stash name: "ckProfiler.tar.gz"
|
||||
}
|
||||
if (params.RUN_FULL_QA && do_perf_tests == 0 ){
|
||||
// build deb packages for all gfx9 targets and prepare to export
|
||||
if (params.RUN_FULL_QA && arch_type == 1 ){
|
||||
// build deb packages for all gfx9 targets on gfx90a system and prepare to export
|
||||
echo "Build ckProfiler package"
|
||||
sh 'make -j package'
|
||||
archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb'
|
||||
archiveArtifacts artifacts: 'composablekernel-tests_*.deb'
|
||||
sh 'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb'
|
||||
stash name: "ckprofiler_0.2.0_amd64.deb"
|
||||
stash includes: "ckprofiler_0.2.0_amd64.deb", name: "ckprofiler_0.2.0_amd64.deb"
|
||||
}
|
||||
}
|
||||
if (params.hipTensor_test && do_perf_tests == 0 ){
|
||||
//build and test hipTensor
|
||||
// run performance tests, stash the logs, results will be processed on the master node
|
||||
dir("script"){
|
||||
if (params.RUN_PERFORMANCE_TESTS){
|
||||
if (params.RUN_FULL_QA && arch_type == 1){
|
||||
// run full tests on gfx90a
|
||||
echo "Run full performance tests"
|
||||
sh "./run_full_performance_tests.sh 0 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
archiveArtifacts "perf_batched_gemm.log"
|
||||
archiveArtifacts "perf_grouped_gemm.log"
|
||||
archiveArtifacts "perf_grouped_conv_fwd.log"
|
||||
archiveArtifacts "perf_grouped_conv_bwd_data.log"
|
||||
archiveArtifacts "perf_grouped_conv_bwd_weight.log"
|
||||
archiveArtifacts "perf_gemm_bilinear.log"
|
||||
archiveArtifacts "perf_reduction.log"
|
||||
archiveArtifacts "perf_splitK_gemm.log"
|
||||
archiveArtifacts "perf_onnx_gemm.log"
|
||||
archiveArtifacts "perf_mixed_gemm.log"
|
||||
stash includes: "perf_**.log", name: "perf_log"
|
||||
}
|
||||
else if ( arch_type == 1 ){
|
||||
// run standard tests on gfx90a
|
||||
echo "Run performance tests"
|
||||
sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_onnx_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
stash includes: "perf_**.log", name: "perf_log"
|
||||
}
|
||||
// disable performance tests on gfx1030 for now.
|
||||
//else if ( arch_type == 3){
|
||||
// run basic tests on gfx1030
|
||||
// echo "Run gemm performance tests"
|
||||
// sh "./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} gfx10"
|
||||
// archiveArtifacts "perf_onnx_gemm_gfx10.log"
|
||||
// stash includes: "perf_onnx_gemm_gfx10.log", name: "perf_log_gfx10"
|
||||
//}
|
||||
else if ( arch_type == 4){
|
||||
// run basic tests on gfx11
|
||||
echo "Run gemm performance tests"
|
||||
sh "./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} gfx11"
|
||||
archiveArtifacts "perf_onnx_gemm_gfx11.log"
|
||||
stash includes: "perf_onnx_gemm_gfx11.log", name: "perf_log_gfx11"
|
||||
}
|
||||
else if ( arch_type == 5 ){
|
||||
// run basic tests on gfx12
|
||||
echo "Run gemm performance tests"
|
||||
sh "./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} gfx12"
|
||||
archiveArtifacts "perf_onnx_gemm_gfx12.log"
|
||||
stash includes: "perf_onnx_gemm_gfx12.log", name: "perf_log_gfx12"
|
||||
}
|
||||
}
|
||||
}
|
||||
if (params.hipTensor_test && arch_type == 1 ){
|
||||
// build and test hipTensor on gfx90a node
|
||||
sh """#!/bin/bash
|
||||
rm -rf "${params.hipTensor_branch}".zip
|
||||
rm -rf hipTensor-"${params.hipTensor_branch}"
|
||||
@@ -690,10 +625,8 @@ def process_results(Map conf=[:]){
|
||||
dir("script"){
|
||||
if (params.RUN_CK_TILE_FMHA_TESTS){
|
||||
try{
|
||||
unstash "perf_fmha_fwd_gfx942.log"
|
||||
unstash "perf_fmha_bwd_gfx942.log"
|
||||
unstash "perf_fmha_fwd_gfx90a.log"
|
||||
unstash "perf_fmha_bwd_gfx90a.log"
|
||||
unstash "perf_fmha_log_gfx942"
|
||||
unstash "perf_fmha_log_gfx90a"
|
||||
}
|
||||
catch(Exception err){
|
||||
echo "could not locate the FMHA performance logs: ${err.getMessage()}."
|
||||
@@ -703,26 +636,26 @@ def process_results(Map conf=[:]){
|
||||
// unstash perf files to master
|
||||
unstash "ckprofiler_0.2.0_amd64.deb"
|
||||
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
|
||||
unstash "perf_gemm.log"
|
||||
unstash "perf_resnet50_N256.log"
|
||||
unstash "perf_resnet50_N4.log"
|
||||
unstash "perf_batched_gemm.log"
|
||||
unstash "perf_grouped_gemm.log"
|
||||
unstash "perf_grouped_conv_fwd.log"
|
||||
unstash "perf_grouped_conv_bwd_data.log"
|
||||
unstash "perf_grouped_conv_bwd_weight.log"
|
||||
unstash "perf_gemm_bilinear.log"
|
||||
unstash "perf_reduction.log"
|
||||
unstash "perf_splitK_gemm.log"
|
||||
unstash "perf_onnx_gemm.log"
|
||||
unstash "perf_mixed_gemm.log"
|
||||
unstash "perf_log"
|
||||
try{
|
||||
unstash "perf_log_gfx11"
|
||||
unstash "perf_log_gfx12"
|
||||
}
|
||||
catch(Exception err){
|
||||
echo "could not locate the GEMM gfx11/gfx12 performance logs: ${err.getMessage()}."
|
||||
}
|
||||
sh "./process_qa_data.sh"
|
||||
}
|
||||
else{
|
||||
// unstash perf files to master
|
||||
unstash "perf_gemm.log"
|
||||
unstash "perf_resnet50_N256.log"
|
||||
unstash "perf_resnet50_N4.log"
|
||||
unstash "perf_log"
|
||||
try{
|
||||
unstash "perf_log_gfx11"
|
||||
unstash "perf_log_gfx12"
|
||||
}
|
||||
catch(Exception err){
|
||||
echo "could not locate the GEMM gfx11/gfx12 performance logs: ${err.getMessage()}."
|
||||
}
|
||||
sh "./process_perf_data.sh"
|
||||
}
|
||||
}
|
||||
@@ -1241,29 +1174,6 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage("Performance Tests")
|
||||
{
|
||||
parallel
|
||||
{
|
||||
stage("Run ckProfiler: gfx90a")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
|
||||
}
|
||||
options { retry(1) }
|
||||
agent{ label rocmnode("gfx90a")}
|
||||
environment{
|
||||
setup_args = "NO_CK_BUILD"
|
||||
}
|
||||
steps{
|
||||
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')
|
||||
cleanWs()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Process Performance Test Results")
|
||||
{
|
||||
parallel
|
||||
|
||||
@@ -6,7 +6,7 @@ set(CK_TILE_SRC_FOLDER ${CMAKE_SOURCE_DIR}/include/ck_tile/)
|
||||
# CK Codegen requires dataclass which is added in Python 3.7
|
||||
# Python version 3.8 is required for general good practice as it is default for Ubuntu 20.04
|
||||
if(NOT CK_USE_ALTERNATIVE_PYTHON)
|
||||
find_package(PythonInterp 3 REQUIRED)
|
||||
find_package(Python3 COMPONENTS Interpreter Development)
|
||||
else()
|
||||
message("Using alternative python version")
|
||||
set(EXTRA_PYTHON_PATH)
|
||||
@@ -33,7 +33,7 @@ set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
|
||||
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
|
||||
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
|
||||
COMMAND ${Python3_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
|
||||
--list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt
|
||||
--api ${FMHA_KNOWN_APIS}
|
||||
--receipt 3
|
||||
@@ -50,7 +50,7 @@ endif()
|
||||
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
|
||||
add_custom_command(
|
||||
OUTPUT ${FMHA_GEN_BLOBS}
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
|
||||
COMMAND ${Python3_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
|
||||
--output_dir ${FMHA_CPP_FOLDER}
|
||||
--api ${FMHA_KNOWN_APIS}
|
||||
--receipt 3
|
||||
|
||||
@@ -82,7 +82,7 @@ def parse_logfile(logfile):
|
||||
StrideA=[]
|
||||
StrideB=[]
|
||||
StrideC=[]
|
||||
if 'perf_gemm.log' in logfile:
|
||||
if 'perf_gemm' in logfile and 'gemm_bilinear' not in logfile:
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
@@ -260,7 +260,7 @@ def main():
|
||||
conn = sqlEngine.connect()
|
||||
|
||||
#save gemm performance tests:
|
||||
if 'perf_gemm.log' in filename:
|
||||
if 'perf_gemm' in filename and 'gemm_bilinear' not in filename:
|
||||
#write the ck_gemm_test_params table only needed once the test set changes
|
||||
#post_test_params(test_list,conn)
|
||||
for i in range(1,len(results)+1):
|
||||
|
||||
@@ -11,9 +11,22 @@
|
||||
|
||||
#process results
|
||||
python3 process_perf_data.py perf_gemm.log
|
||||
python3 process_perf_data.py perf_onnx_gemm.log
|
||||
python3 process_perf_data.py perf_resnet50_N256.log
|
||||
python3 process_perf_data.py perf_resnet50_N4.log
|
||||
|
||||
file=./perf_onnx_gemm_gfx10.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_onnx_gemm_gfx10.log
|
||||
fi
|
||||
file=./perf_onnx_gemm_gfx11.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_onnx_gemm_gfx11.log
|
||||
fi
|
||||
file=./perf_onnx_gemm_gfx12.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_onnx_gemm_gfx12.log
|
||||
fi
|
||||
file=./perf_fmha_fwd_gfx942.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
|
||||
|
||||
@@ -24,6 +24,18 @@ python3 process_perf_data.py perf_splitK_gemm.log
|
||||
python3 process_perf_data.py perf_onnx_gemm.log
|
||||
python3 process_perf_data.py perf_mixed_gemm.log
|
||||
|
||||
file=./perf_onnx_gemm_gfx10.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_onnx_gemm_gfx10.log
|
||||
fi
|
||||
file=./perf_onnx_gemm_gfx11.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_onnx_gemm_gfx11.log
|
||||
fi
|
||||
file=./perf_onnx_gemm_gfx12.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_onnx_gemm_gfx12.log
|
||||
fi
|
||||
file=./perf_fmha_fwd_gfx942.log
|
||||
if [ -e "$file" ]; then
|
||||
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verifuy correctness on CPU (may take a long time)
|
||||
|
||||
41
script/run_gemm_performance_tests.sh
Executable file
41
script/run_gemm_performance_tests.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# run the script as "./run_gemm_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name> <arch>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verify correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
# arch : GPU architecture, e.g. "gfx9" or "gfx1100"
|
||||
|
||||
#get the command line arguments:
|
||||
export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export branch=$3
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$4
|
||||
echo 'Host name: ' $host_name
|
||||
export arch=$5
|
||||
echo 'GPU architecture: ' $arch
|
||||
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
#run ONNX gemm tests
|
||||
export onnx_log="perf_onnx_gemm_$arch.log"
|
||||
print_log_header $onnx_log $env_type $branch $host_name
|
||||
./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verify correctness on CPU (may take a long time)
|
||||
@@ -51,20 +51,11 @@ print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
|
||||
#run grouped_fwd fp16 tests
|
||||
export grouped_conv_fwd_log="perf_grouped_conv_fwd_fp16.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
|
||||
|
||||
#run grouped_bwd_data fp16 tests
|
||||
export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data_fp16.log"
|
||||
print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
|
||||
#run grouped_bwd_weight fp16 tests
|
||||
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight_fp16.log"
|
||||
print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
|
||||
#run ONNX gemm tests
|
||||
export onnx_log="perf_onnx_gemm.log"
|
||||
print_log_header $onnx_log $env_type $branch $host_name
|
||||
./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
|
||||
Reference in New Issue
Block a user