mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
Build the CK targets only once. (#433)
* build CK only once, use deb package in all subsequent stages * update jenkins file * change prefix for build_CK stage * update writing deb metadata to control file * update ubuntu source for docker, script syntax for deb package metadata * try different way to create deb metadata * clean up DEBIAN before creating one * fix the CI folder names, fix splitK qa * use correct docker in all stages, separate tests for splitK verification and performance * clean old comments, change dir before packaging * use different package syntax * change packaging syntax * package with cmake * remove unnecessary build prefix * get rid of unnecessary paths * change paths during unpacking * change script syntax while unpacking * get rid of unneccesary steps * get rid of comments in the scripts * use double quotes for scripts * add ccache during build, try dpkg -x * pull and install each package separately * use full package names * try to use stashing for packages * change stash/unstash syntax * move unstash out of shell, run tests on any gpu node * unpack each package separately * try re-using existing workspace * merge the build and test stages, only stash ckProfiler * merge the build and test stages, only stash zipped ckProfiler * fix syntax * add GPU check before build and test, rename docker to usual name
This commit is contained in:
@@ -12,7 +12,8 @@ RUN apt-get install -y wget gnupg
|
||||
RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
|
||||
RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO ubuntu main > /etc/apt/sources.list.d/rocm.list"
|
||||
RUN wget --no-check-certificate -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
|
||||
RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list"
|
||||
#RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list"
|
||||
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
|
||||
@@ -68,7 +69,6 @@ ENV UBSAN_OPTIONS=print_stacktrace=1
|
||||
|
||||
ENV LC_ALL=C.UTF-8
|
||||
ENV LANG=C.UTF-8
|
||||
ADD dev-requirements.txt dev-requirements.txt
|
||||
RUN groupadd -f render
|
||||
|
||||
# Install the new rocm-cmake version
|
||||
|
||||
288
Jenkinsfile
vendored
288
Jenkinsfile
vendored
@@ -42,7 +42,6 @@ def build_compiler(){
|
||||
def getDockerImage(Map conf=[:]){
|
||||
env.DOCKER_BUILDKIT=1
|
||||
def prefixpath = conf.get("prefixpath", "/opt/rocm") // prefix:/opt/rocm
|
||||
def gpu_arch = conf.get("gpu_arch", "gfx908") // prebuilt dockers should have all the architectures enabled so one image can be used for all stages
|
||||
def no_cache = conf.get("no_cache", false)
|
||||
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' "
|
||||
if(env.CCACHE_HOST)
|
||||
@@ -154,6 +153,10 @@ def cmake_build(Map conf=[:]){
|
||||
}else{
|
||||
setup_args = " -DCMAKE_BUILD_TYPE=release" + setup_args
|
||||
}
|
||||
if(env.CCACHE_HOST)
|
||||
{
|
||||
setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER='ccache' -DCMAKE_C_COMPILER_LAUNCHER='ccache' " + setup_args
|
||||
}
|
||||
|
||||
def pre_setup_cmd = """
|
||||
echo \$HSA_ENABLE_SDMA
|
||||
@@ -191,15 +194,13 @@ def buildHipClangJob(Map conf=[:]){
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
checkout scm
|
||||
|
||||
def image = "composable_kernels_${params.COMPILER_VERSION}"
|
||||
def image = getDockerImageName()
|
||||
def prefixpath = conf.get("prefixpath", "/opt/rocm")
|
||||
def gpu_arch = conf.get("gpu_arch", "gfx908")
|
||||
|
||||
// Jenkins is complaining about the render group
|
||||
// def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
if (conf.get("enforce_xnack_on", false)) {
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 --env GPU_ARCH='${gpu_arch}' "
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
|
||||
}
|
||||
def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' "
|
||||
if (params.COMPILER_VERSION != "release"){
|
||||
@@ -281,16 +282,13 @@ def runCKProfiler(Map conf=[:]){
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
checkout scm
|
||||
|
||||
|
||||
def image = "composable_kernels_${params.COMPILER_VERSION}"
|
||||
def image = getDockerImageName()
|
||||
def prefixpath = conf.get("prefixpath", "/opt/rocm")
|
||||
def gpu_arch = conf.get("gpu_arch", "gfx908")
|
||||
|
||||
// Jenkins is complaining about the render group
|
||||
// def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
if (conf.get("enforce_xnack_on", false)) {
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 --env GPU_ARCH='${gpu_arch}' "
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
|
||||
}
|
||||
def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' "
|
||||
if (params.COMPILER_VERSION != "release"){
|
||||
@@ -302,7 +300,6 @@ def runCKProfiler(Map conf=[:]){
|
||||
|
||||
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
|
||||
try {
|
||||
//retimage = docker.build("${image}", dockerArgs + '.')
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
@@ -338,48 +335,57 @@ def runCKProfiler(Map conf=[:]){
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
{
|
||||
cmake_build(conf)
|
||||
//cmake_build(conf)
|
||||
//instead of building, just unstash the ckProfiler and install it
|
||||
sh """
|
||||
rm -rf build
|
||||
mkdir build
|
||||
"""
|
||||
dir("build"){
|
||||
unstash 'ckProfiler.tar.gz'
|
||||
sh 'tar -xvf ckProfiler.tar.gz'
|
||||
}
|
||||
|
||||
dir("script"){
|
||||
if (params.RUN_FULL_QA){
|
||||
def qa_log = "qa_${gpu_arch}.log"
|
||||
sh "./run_full_performance_tests.sh 1 QA_${params.COMPILER_VERSION} ${gpu_arch} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_batched_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_grouped_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_conv_fwd_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_conv_bwd_data_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_gemm_bilinear_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_reduction_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_splitK_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_onnx_gemm_${gpu_arch}.log"
|
||||
sh "./run_full_performance_tests.sh 1 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
archiveArtifacts "perf_batched_gemm.log"
|
||||
archiveArtifacts "perf_grouped_gemm.log"
|
||||
archiveArtifacts "perf_conv_fwd.log"
|
||||
archiveArtifacts "perf_conv_bwd_data.log"
|
||||
archiveArtifacts "perf_gemm_bilinear.log"
|
||||
archiveArtifacts "perf_reduction.log"
|
||||
archiveArtifacts "perf_splitK_gemm_verify.log"
|
||||
archiveArtifacts "perf_splitK_gemm.log"
|
||||
archiveArtifacts "perf_onnx_gemm.log"
|
||||
// stash perf files to master
|
||||
stash name: "perf_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_resnet50_N256_${gpu_arch}.log"
|
||||
stash name: "perf_resnet50_N4_${gpu_arch}.log"
|
||||
stash name: "perf_batched_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_grouped_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_conv_fwd_${gpu_arch}.log"
|
||||
stash name: "perf_conv_bwd_data_${gpu_arch}.log"
|
||||
stash name: "perf_gemm_bilinear_${gpu_arch}.log"
|
||||
stash name: "perf_reduction_${gpu_arch}.log"
|
||||
stash name: "perf_splitK_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_onnx_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_gemm.log"
|
||||
stash name: "perf_resnet50_N256.log"
|
||||
stash name: "perf_resnet50_N4.log"
|
||||
stash name: "perf_batched_gemm.log"
|
||||
stash name: "perf_grouped_gemm.log"
|
||||
stash name: "perf_conv_fwd.log"
|
||||
stash name: "perf_conv_bwd_data.log"
|
||||
stash name: "perf_gemm_bilinear.log"
|
||||
stash name: "perf_reduction.log"
|
||||
stash name: "perf_splitK_gemm.log"
|
||||
stash name: "perf_onnx_gemm.log"
|
||||
//we will process results on the master node
|
||||
}
|
||||
else{
|
||||
sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${gpu_arch} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log"
|
||||
sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
// stash perf files to master
|
||||
stash name: "perf_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_resnet50_N256_${gpu_arch}.log"
|
||||
stash name: "perf_resnet50_N4_${gpu_arch}.log"
|
||||
stash name: "perf_gemm.log"
|
||||
stash name: "perf_resnet50_N256.log"
|
||||
stash name: "perf_resnet50_N4.log"
|
||||
//we will process the results on the master node
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -403,17 +409,104 @@ def runPerfTest(Map conf=[:]){
|
||||
}
|
||||
}
|
||||
|
||||
def Build_CK(Map conf=[:]){
|
||||
show_node_info()
|
||||
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
checkout scm
|
||||
|
||||
def image = getDockerImageName()
|
||||
def prefixpath = conf.get("prefixpath", "/opt/rocm")
|
||||
|
||||
// Jenkins is complaining about the render group
|
||||
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
if (conf.get("enforce_xnack_on", false)) {
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
|
||||
}
|
||||
def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' "
|
||||
if (params.COMPILER_VERSION != "release"){
|
||||
dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
|
||||
}
|
||||
|
||||
def variant = env.STAGE_NAME
|
||||
def retimage
|
||||
|
||||
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
|
||||
try {
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
|
||||
echo "The job was cancelled or aborted"
|
||||
throw e
|
||||
}
|
||||
catch(Exception ex) {
|
||||
retimage = docker.build("${image}", dockerArgs + " --no-cache .")
|
||||
withDockerContainer(image: image, args: dockerOpts) {
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
{
|
||||
cmake_build(conf)
|
||||
dir("build"){
|
||||
//run tests and examples
|
||||
sh 'make -j check'
|
||||
//we only need the ckProfiler to run the performance tests, so we pack and stash it
|
||||
sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
|
||||
stash "ckProfiler.tar.gz"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return retimage
|
||||
}
|
||||
|
||||
def Build_CK_and_Reboot(Map conf=[:]){
|
||||
try{
|
||||
Build_CK(conf)
|
||||
}
|
||||
catch(e){
|
||||
echo "throwing error exception while building CK"
|
||||
echo 'Exception occurred: ' + e.toString()
|
||||
throw e
|
||||
}
|
||||
finally{
|
||||
if (!conf.get("no_reboot", false)) {
|
||||
reboot()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def process_results(Map conf=[:]){
|
||||
env.HSA_ENABLE_SDMA=0
|
||||
checkout scm
|
||||
def image = "composable_kernels_${params.COMPILER_VERSION}"
|
||||
def image = getDockerImageName()
|
||||
def prefixpath = "/opt/rocm"
|
||||
def gpu_arch = conf.get("gpu_arch", "gfx908")
|
||||
|
||||
// Jenkins is complaining about the render group
|
||||
def dockerOpts="--cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
|
||||
if (conf.get("enforce_xnack_on", false)) {
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 --env GPU_ARCH='${gpu_arch}' "
|
||||
dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
|
||||
}
|
||||
def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='release' "
|
||||
|
||||
@@ -422,7 +515,6 @@ def process_results(Map conf=[:]){
|
||||
|
||||
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
|
||||
try {
|
||||
//retimage = docker.build("${image}", dockerArgs + '.')
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
}
|
||||
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
|
||||
@@ -437,25 +529,25 @@ def process_results(Map conf=[:]){
|
||||
dir("script"){
|
||||
if (params.RUN_FULL_QA){
|
||||
// unstash perf files to master
|
||||
unstash "perf_gemm_${gpu_arch}.log"
|
||||
unstash "perf_resnet50_N256_${gpu_arch}.log"
|
||||
unstash "perf_resnet50_N4_${gpu_arch}.log"
|
||||
unstash "perf_batched_gemm_${gpu_arch}.log"
|
||||
unstash "perf_grouped_gemm_${gpu_arch}.log"
|
||||
unstash "perf_conv_fwd_${gpu_arch}.log"
|
||||
unstash "perf_conv_bwd_data_${gpu_arch}.log"
|
||||
unstash "perf_gemm_bilinear_${gpu_arch}.log"
|
||||
unstash "perf_reduction_${gpu_arch}.log"
|
||||
unstash "perf_splitK_gemm_${gpu_arch}.log"
|
||||
unstash "perf_onnx_gemm_${gpu_arch}.log"
|
||||
sh "./process_qa_data.sh ${gpu_arch}"
|
||||
unstash "perf_gemm.log"
|
||||
unstash "perf_resnet50_N256.log"
|
||||
unstash "perf_resnet50_N4.log"
|
||||
unstash "perf_batched_gemm.log"
|
||||
unstash "perf_grouped_gemm.log"
|
||||
unstash "perf_conv_fwd.log"
|
||||
unstash "perf_conv_bwd_data.log"
|
||||
unstash "perf_gemm_bilinear.log"
|
||||
unstash "perf_reduction.log"
|
||||
unstash "perf_splitK_gemm.log"
|
||||
unstash "perf_onnx_gemm.log"
|
||||
sh "./process_qa_data.sh"
|
||||
}
|
||||
else{
|
||||
// unstash perf files to master
|
||||
unstash "perf_gemm_${gpu_arch}.log"
|
||||
unstash "perf_resnet50_N256_${gpu_arch}.log"
|
||||
unstash "perf_resnet50_N4_${gpu_arch}.log"
|
||||
sh "./process_perf_data.sh ${gpu_arch}"
|
||||
unstash "perf_gemm.log"
|
||||
unstash "perf_resnet50_N256.log"
|
||||
unstash "perf_resnet50_N4.log"
|
||||
sh "./process_perf_data.sh"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -562,41 +654,29 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Tests")
|
||||
|
||||
stage("Build CK and run Tests")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { !params.TEST_NODE_PERFORMANCE.toBoolean() }
|
||||
}
|
||||
parallel
|
||||
{
|
||||
stage("Run Tests: gfx908")
|
||||
stage("Build CK and run Tests")
|
||||
{
|
||||
agent{ label rocmnode("gfx908")}
|
||||
agent{ label rocmnode("gfx908 || gfx90a") }
|
||||
environment{
|
||||
setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 " -DBUILD_DEV=On """}"
|
||||
setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" """ : """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 --offload-arch=gfx90a -O3 " """ }"
|
||||
execute_args = "${params.COMPILER_VERSION == "ck-9110" ? """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ : """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ }"
|
||||
}
|
||||
steps{
|
||||
buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release', gpu_arch: "gfx908")
|
||||
}
|
||||
}
|
||||
stage("Run Tests: gfx90a")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
options { retry(2) }
|
||||
agent{ label rocmnode("gfx90a")}
|
||||
environment{
|
||||
setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 " -DBUILD_DEV=On """}"
|
||||
}
|
||||
steps{
|
||||
buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release', gpu_arch: "gfx90a")
|
||||
Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
//at present this stage only builds binaries.
|
||||
//we will now build all binaries in a separate stage.
|
||||
//once we have some tests to run in this stage, we can enable it again.
|
||||
stage("Client App")
|
||||
{
|
||||
when {
|
||||
@@ -611,7 +691,6 @@ pipeline {
|
||||
environment{
|
||||
setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" """ : """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """ }"
|
||||
execute_args = "${params.COMPILER_VERSION == "ck-9110" ? """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ : """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ }"
|
||||
|
||||
}
|
||||
steps{
|
||||
buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
|
||||
@@ -619,23 +698,24 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
stage("Performance Tests")
|
||||
{
|
||||
parallel
|
||||
{
|
||||
stage("Run ckProfiler: gfx908")
|
||||
stage("Run ckProfiler: gfx908 or gfx90a")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { !params.RUN_FULL_QA.toBoolean() && !params.TEST_NODE_PERFORMANCE.toBoolean() }
|
||||
}
|
||||
options { retry(2) }
|
||||
agent{ label rocmnode("gfx908")}
|
||||
agent{ label rocmnode("gfx908 || gfx90a")}
|
||||
environment{
|
||||
setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 " -DBUILD_DEV=On """}"
|
||||
}
|
||||
steps{
|
||||
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release', gpu_arch: "gfx908")
|
||||
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')
|
||||
}
|
||||
}
|
||||
stage("Run ckProfiler: gfx90a")
|
||||
@@ -650,7 +730,7 @@ pipeline {
|
||||
setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 " -DBUILD_DEV=On """}"
|
||||
}
|
||||
steps{
|
||||
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release', gpu_arch: "gfx90a")
|
||||
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -659,24 +739,10 @@ pipeline {
|
||||
{
|
||||
parallel
|
||||
{
|
||||
stage("Process results for gfx908"){
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { !params.RUN_FULL_QA.toBoolean() && !params.TEST_NODE_PERFORMANCE.toBoolean() }
|
||||
}
|
||||
stage("Process results"){
|
||||
agent { label 'mici' }
|
||||
steps{
|
||||
process_results(gpu_arch: "gfx908")
|
||||
}
|
||||
}
|
||||
stage("Process results for gfx90a"){
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_FULL_QA.toBoolean() || params.TEST_NODE_PERFORMANCE.toBoolean() }
|
||||
}
|
||||
agent { label 'mici' }
|
||||
steps{
|
||||
process_results(gpu_arch: "gfx90a")
|
||||
process_results()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
ROCmSoftwarePlatform/rocm-recipes
|
||||
# 1.90+
|
||||
danmar/cppcheck@dd05839a7e63ef04afd34711cb3e1e0ef742882f
|
||||
@@ -2,15 +2,14 @@
|
||||
#
|
||||
# in order to run this script you'd need the following python packages:
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
#pip3 install --upgrade pip
|
||||
#pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
|
||||
#process results
|
||||
gpu_arch=$1
|
||||
python3 process_perf_data.py perf_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm.log
|
||||
python3 process_perf_data.py perf_resnet50_N256.log
|
||||
python3 process_perf_data.py perf_resnet50_N4.log
|
||||
|
||||
@@ -10,15 +10,14 @@
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
|
||||
#process results
|
||||
gpu_arch=$1
|
||||
python3 process_perf_data.py perf_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_reduction_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_splitK_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_onnx_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm.log
|
||||
python3 process_perf_data.py perf_resnet50_N256.log
|
||||
python3 process_perf_data.py perf_resnet50_N4.log
|
||||
python3 process_perf_data.py perf_batched_gemm.log
|
||||
python3 process_perf_data.py perf_grouped_gemm.log
|
||||
python3 process_perf_data.py perf_conv_fwd.log
|
||||
python3 process_perf_data.py perf_conv_bwd_data.log
|
||||
python3 process_perf_data.py perf_gemm_bilinear.log
|
||||
python3 process_perf_data.py perf_reduction.log
|
||||
python3 process_perf_data.py perf_splitK_gemm.log
|
||||
python3 process_perf_data.py perf_onnx_gemm.log
|
||||
|
||||
@@ -5,12 +5,11 @@
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
|
||||
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verifuy correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
|
||||
@@ -19,11 +18,9 @@ export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export gpu_arch=$3
|
||||
echo 'GPU architecture: ' $gpu_arch
|
||||
export branch=$4
|
||||
export branch=$3
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$5
|
||||
export host_name=$4
|
||||
echo 'Host name: ' $host_name
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
@@ -38,7 +35,7 @@ function print_log_header(){
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
|
||||
@@ -58,7 +55,7 @@ print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
|
||||
|
||||
#run batched_gemm tests
|
||||
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
|
||||
export batched_gemm_log="perf_batched_gemm.log"
|
||||
print_log_header $batched_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
|
||||
@@ -78,7 +75,7 @@ print_log_header $batched_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
|
||||
|
||||
#run grouped_gemm tests
|
||||
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
|
||||
export grouped_gemm_log="perf_grouped_gemm.log"
|
||||
print_log_header $grouped_gemm_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
|
||||
@@ -86,7 +83,7 @@ print_log_header $grouped_gemm_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
|
||||
|
||||
#run GEMM+Bilinear tests
|
||||
export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log"
|
||||
export gemm_bilinear_log="perf_gemm_bilinear.log"
|
||||
print_log_header $gemm_bilinear_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
@@ -94,7 +91,7 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
|
||||
#run conv_fwd tests
|
||||
export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log"
|
||||
export conv_fwd_log="perf_conv_fwd.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
@@ -102,7 +99,7 @@ print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
|
||||
#run conv_bwd_data tests
|
||||
export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log"
|
||||
export conv_bwd_data_log="perf_conv_bwd_data.log"
|
||||
print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
@@ -110,33 +107,43 @@ print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log
|
||||
|
||||
#run reduction tests
|
||||
export reduction_log="perf_reduction_${gpu_arch}.log"
|
||||
export reduction_log="perf_reduction.log"
|
||||
print_log_header $reduction_log $env_type $branch $host_name
|
||||
./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
|
||||
./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
|
||||
|
||||
#run splitK_gemm tests
|
||||
export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log"
|
||||
#run splitK_gemm tests, first correctness verification, then performance
|
||||
export splitK_gemm_ver_log="perf_splitK_gemm_verify.log"
|
||||
print_log_header $splitK_gemm_ver_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
export splitK_gemm_log="perf_splitK_gemm.log"
|
||||
print_log_header $splitK_gemm_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
|
||||
#run ONNX gemm tests
|
||||
export onnx_log="perf_onnx_gemm_${gpu_arch}.log"
|
||||
export onnx_log="perf_onnx_gemm.log"
|
||||
print_log_header $onnx_log $env_type $branch $host_name
|
||||
./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
|
||||
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
|
||||
# input arguments:
|
||||
# verification = 0 : do not verify result correctness on CPU
|
||||
# = 1 : verify correctness on CPU (may take a long time)
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
|
||||
@@ -15,11 +14,9 @@ export verify=$1
|
||||
echo 'Verification: ' $verify
|
||||
export env_type=$2
|
||||
echo 'Environment type: ' $env_type
|
||||
export gpu_arch=$3
|
||||
echo 'GPU architecture: ' $gpu_arch
|
||||
export branch=$4
|
||||
export branch=$3
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$5
|
||||
export host_name=$4
|
||||
echo 'Host name: ' $host_name
|
||||
|
||||
function print_log_header(){
|
||||
@@ -35,7 +32,7 @@ function print_log_header(){
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
@@ -55,9 +52,9 @@ print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log
|
||||
|
||||
Reference in New Issue
Block a user