diff --git a/Dockerfile b/Dockerfile index bcae24647d..59a6a60453 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,8 @@ RUN apt-get install -y wget gnupg RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO ubuntu main > /etc/apt/sources.list.d/rocm.list" RUN wget --no-check-certificate -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - -RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list" +#RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list" +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" # Install dependencies RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ @@ -68,7 +69,6 @@ ENV UBSAN_OPTIONS=print_stacktrace=1 ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 -ADD dev-requirements.txt dev-requirements.txt RUN groupadd -f render # Install the new rocm-cmake version diff --git a/Jenkinsfile b/Jenkinsfile index 8440c2f1dd..62f53e04c2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -42,7 +42,6 @@ def build_compiler(){ def getDockerImage(Map conf=[:]){ env.DOCKER_BUILDKIT=1 def prefixpath = conf.get("prefixpath", "/opt/rocm") // prefix:/opt/rocm - def gpu_arch = conf.get("gpu_arch", "gfx908") // prebuilt dockers should have all the architectures enabled so one image can be used for all stages def no_cache = conf.get("no_cache", false) def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' " if(env.CCACHE_HOST) @@ -154,6 +153,10 @@ def cmake_build(Map conf=[:]){ }else{ setup_args = " -DCMAKE_BUILD_TYPE=release" + setup_args } + if(env.CCACHE_HOST) + { + setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER='ccache' -DCMAKE_C_COMPILER_LAUNCHER='ccache' " + setup_args + } def pre_setup_cmd = """ echo \$HSA_ENABLE_SDMA @@ -191,15 +194,13 @@ def buildHipClangJob(Map conf=[:]){ env.HSA_ENABLE_SDMA=0 checkout scm - def image = "composable_kernels_${params.COMPILER_VERSION}" + def image = getDockerImageName() def prefixpath = conf.get("prefixpath", "/opt/rocm") - def gpu_arch = conf.get("gpu_arch", "gfx908") // Jenkins is complaining about the render group - // def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" - def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" + def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" if (conf.get("enforce_xnack_on", false)) { - dockerOpts = dockerOpts + " --env HSA_XNACK=1 --env GPU_ARCH='${gpu_arch}' " + dockerOpts = dockerOpts + " --env HSA_XNACK=1 " } def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' " if (params.COMPILER_VERSION != "release"){ @@ -281,16 +282,13 @@ def runCKProfiler(Map conf=[:]){ env.HSA_ENABLE_SDMA=0 checkout scm - - def image = "composable_kernels_${params.COMPILER_VERSION}" + def image = getDockerImageName() def prefixpath = conf.get("prefixpath", "/opt/rocm") - def gpu_arch = conf.get("gpu_arch", "gfx908") // Jenkins is complaining about the render group - // def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" - def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" + def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" if (conf.get("enforce_xnack_on", false)) { - dockerOpts = dockerOpts + " --env HSA_XNACK=1 --env GPU_ARCH='${gpu_arch}' " + dockerOpts = dockerOpts + " --env HSA_XNACK=1 " } def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' " if (params.COMPILER_VERSION != "release"){ @@ -302,7 +300,6 @@ def runCKProfiler(Map conf=[:]){ gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { try { - //retimage = docker.build("${image}", dockerArgs + '.') (retimage, image) = getDockerImage(conf) withDockerContainer(image: image, args: dockerOpts) { timeout(time: 5, unit: 'MINUTES'){ @@ -338,48 +335,57 @@ def runCKProfiler(Map conf=[:]){ withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { timeout(time: 24, unit: 'HOURS') { - cmake_build(conf) + //cmake_build(conf) + //instead of building, just unstash the ckProfiler and install it + sh """ + rm -rf build + mkdir build + """ + dir("build"){ + unstash 'ckProfiler.tar.gz' + sh 'tar -xvf ckProfiler.tar.gz' + } + dir("script"){ if (params.RUN_FULL_QA){ - def qa_log = "qa_${gpu_arch}.log" - sh "./run_full_performance_tests.sh 1 QA_${params.COMPILER_VERSION} ${gpu_arch} ${env.BRANCH_NAME} ${NODE_NAME}" - archiveArtifacts "perf_gemm_${gpu_arch}.log" - archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log" - archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log" - archiveArtifacts "perf_batched_gemm_${gpu_arch}.log" - archiveArtifacts "perf_grouped_gemm_${gpu_arch}.log" - archiveArtifacts "perf_conv_fwd_${gpu_arch}.log" - archiveArtifacts "perf_conv_bwd_data_${gpu_arch}.log" - archiveArtifacts "perf_gemm_bilinear_${gpu_arch}.log" - archiveArtifacts "perf_reduction_${gpu_arch}.log" - archiveArtifacts "perf_splitK_gemm_${gpu_arch}.log" - archiveArtifacts "perf_onnx_gemm_${gpu_arch}.log" + sh "./run_full_performance_tests.sh 1 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}" + archiveArtifacts "perf_gemm.log" + archiveArtifacts "perf_resnet50_N256.log" + archiveArtifacts "perf_resnet50_N4.log" + archiveArtifacts "perf_batched_gemm.log" + archiveArtifacts "perf_grouped_gemm.log" + archiveArtifacts "perf_conv_fwd.log" + archiveArtifacts "perf_conv_bwd_data.log" + archiveArtifacts "perf_gemm_bilinear.log" + archiveArtifacts "perf_reduction.log" + archiveArtifacts "perf_splitK_gemm_verify.log" + archiveArtifacts "perf_splitK_gemm.log" + archiveArtifacts "perf_onnx_gemm.log" // stash perf files to master - stash name: "perf_gemm_${gpu_arch}.log" - stash name: "perf_resnet50_N256_${gpu_arch}.log" - stash name: "perf_resnet50_N4_${gpu_arch}.log" - stash name: "perf_batched_gemm_${gpu_arch}.log" - stash name: "perf_grouped_gemm_${gpu_arch}.log" - stash name: "perf_conv_fwd_${gpu_arch}.log" - stash name: "perf_conv_bwd_data_${gpu_arch}.log" - stash name: "perf_gemm_bilinear_${gpu_arch}.log" - stash name: "perf_reduction_${gpu_arch}.log" - stash name: "perf_splitK_gemm_${gpu_arch}.log" - stash name: "perf_onnx_gemm_${gpu_arch}.log" + stash name: "perf_gemm.log" + stash name: "perf_resnet50_N256.log" + stash name: "perf_resnet50_N4.log" + stash name: "perf_batched_gemm.log" + stash name: "perf_grouped_gemm.log" + stash name: "perf_conv_fwd.log" + stash name: "perf_conv_bwd_data.log" + stash name: "perf_gemm_bilinear.log" + stash name: "perf_reduction.log" + stash name: "perf_splitK_gemm.log" + stash name: "perf_onnx_gemm.log" //we will process results on the master node } else{ - sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${gpu_arch} ${env.BRANCH_NAME} ${NODE_NAME}" - archiveArtifacts "perf_gemm_${gpu_arch}.log" - archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log" - archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log" + sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}" + archiveArtifacts "perf_gemm.log" + archiveArtifacts "perf_resnet50_N256.log" + archiveArtifacts "perf_resnet50_N4.log" // stash perf files to master - stash name: "perf_gemm_${gpu_arch}.log" - stash name: "perf_resnet50_N256_${gpu_arch}.log" - stash name: "perf_resnet50_N4_${gpu_arch}.log" + stash name: "perf_gemm.log" + stash name: "perf_resnet50_N256.log" + stash name: "perf_resnet50_N4.log" //we will process the results on the master node } - } } } @@ -403,17 +409,104 @@ def runPerfTest(Map conf=[:]){ } } +def Build_CK(Map conf=[:]){ + show_node_info() + + env.HSA_ENABLE_SDMA=0 + checkout scm + + def image = getDockerImageName() + def prefixpath = conf.get("prefixpath", "/opt/rocm") + + // Jenkins is complaining about the render group + def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" + if (conf.get("enforce_xnack_on", false)) { + dockerOpts = dockerOpts + " --env HSA_XNACK=1 " + } + def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' " + if (params.COMPILER_VERSION != "release"){ + dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' " + } + + def variant = env.STAGE_NAME + def retimage + + gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { + try { + (retimage, image) = getDockerImage(conf) + withDockerContainer(image: image, args: dockerOpts) { + timeout(time: 5, unit: 'MINUTES'){ + sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' + if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ + throw new Exception ("GPU not found") + } + else{ + echo "GPU is OK" + } + } + } + } + catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){ + echo "The job was cancelled or aborted" + throw e + } + catch(Exception ex) { + retimage = docker.build("${image}", dockerArgs + " --no-cache .") + withDockerContainer(image: image, args: dockerOpts) { + timeout(time: 5, unit: 'MINUTES'){ + sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log' + if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ + throw new Exception ("GPU not found") + } + else{ + echo "GPU is OK" + } + } + } + } + withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { + timeout(time: 24, unit: 'HOURS') + { + cmake_build(conf) + dir("build"){ + //run tests and examples + sh 'make -j check' + //we only need the ckProfiler to run the performance tests, so we pack and stash it + sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler' + stash "ckProfiler.tar.gz" + } + } + } + } + return retimage +} + +def Build_CK_and_Reboot(Map conf=[:]){ + try{ + Build_CK(conf) + } + catch(e){ + echo "throwing error exception while building CK" + echo 'Exception occurred: ' + e.toString() + throw e + } + finally{ + if (!conf.get("no_reboot", false)) { + reboot() + } + } +} + def process_results(Map conf=[:]){ env.HSA_ENABLE_SDMA=0 checkout scm - def image = "composable_kernels_${params.COMPILER_VERSION}" + def image = getDockerImageName() def prefixpath = "/opt/rocm" - def gpu_arch = conf.get("gpu_arch", "gfx908") // Jenkins is complaining about the render group def dockerOpts="--cap-add=SYS_PTRACE --security-opt seccomp=unconfined" if (conf.get("enforce_xnack_on", false)) { - dockerOpts = dockerOpts + " --env HSA_XNACK=1 --env GPU_ARCH='${gpu_arch}' " + dockerOpts = dockerOpts + " --env HSA_XNACK=1 " } def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='release' " @@ -422,7 +515,6 @@ def process_results(Map conf=[:]){ gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { try { - //retimage = docker.build("${image}", dockerArgs + '.') (retimage, image) = getDockerImage(conf) } catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){ @@ -437,25 +529,25 @@ def process_results(Map conf=[:]){ dir("script"){ if (params.RUN_FULL_QA){ // unstash perf files to master - unstash "perf_gemm_${gpu_arch}.log" - unstash "perf_resnet50_N256_${gpu_arch}.log" - unstash "perf_resnet50_N4_${gpu_arch}.log" - unstash "perf_batched_gemm_${gpu_arch}.log" - unstash "perf_grouped_gemm_${gpu_arch}.log" - unstash "perf_conv_fwd_${gpu_arch}.log" - unstash "perf_conv_bwd_data_${gpu_arch}.log" - unstash "perf_gemm_bilinear_${gpu_arch}.log" - unstash "perf_reduction_${gpu_arch}.log" - unstash "perf_splitK_gemm_${gpu_arch}.log" - unstash "perf_onnx_gemm_${gpu_arch}.log" - sh "./process_qa_data.sh ${gpu_arch}" + unstash "perf_gemm.log" + unstash "perf_resnet50_N256.log" + unstash "perf_resnet50_N4.log" + unstash "perf_batched_gemm.log" + unstash "perf_grouped_gemm.log" + unstash "perf_conv_fwd.log" + unstash "perf_conv_bwd_data.log" + unstash "perf_gemm_bilinear.log" + unstash "perf_reduction.log" + unstash "perf_splitK_gemm.log" + unstash "perf_onnx_gemm.log" + sh "./process_qa_data.sh" } else{ // unstash perf files to master - unstash "perf_gemm_${gpu_arch}.log" - unstash "perf_resnet50_N256_${gpu_arch}.log" - unstash "perf_resnet50_N4_${gpu_arch}.log" - sh "./process_perf_data.sh ${gpu_arch}" + unstash "perf_gemm.log" + unstash "perf_resnet50_N256.log" + unstash "perf_resnet50_N4.log" + sh "./process_perf_data.sh" } } } @@ -562,41 +654,29 @@ pipeline { } } } - stage("Tests") + + stage("Build CK and run Tests") { - when { - beforeAgent true - expression { !params.TEST_NODE_PERFORMANCE.toBoolean() } - } parallel { - stage("Run Tests: gfx908") + stage("Build CK and run Tests") { - agent{ label rocmnode("gfx908")} + agent{ label rocmnode("gfx908 || gfx90a") } environment{ - setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 " -DBUILD_DEV=On """}" + setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" """ : """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 --offload-arch=gfx90a -O3 " """ }" + execute_args = "${params.COMPILER_VERSION == "ck-9110" ? """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ : """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ }" } steps{ - buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release', gpu_arch: "gfx908") - } - } - stage("Run Tests: gfx90a") - { - when { - beforeAgent true - expression { params.RUN_FULL_QA.toBoolean() } - } - options { retry(2) } - agent{ label rocmnode("gfx90a")} - environment{ - setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 " -DBUILD_DEV=On """}" - } - steps{ - buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release', gpu_arch: "gfx90a") + Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') } } } } + + /* + //at present this stage only builds binaries. + //we will now build all binaries in a separate stage. + //once we have some tests to run in this stage, we can enable it again. stage("Client App") { when { @@ -611,7 +691,6 @@ pipeline { environment{ setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" """ : """ -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """ }" execute_args = "${params.COMPILER_VERSION == "ck-9110" ? """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ : """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ }" - } steps{ buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') @@ -619,23 +698,24 @@ pipeline { } } } + */ stage("Performance Tests") { parallel { - stage("Run ckProfiler: gfx908") + stage("Run ckProfiler: gfx908 or gfx90a") { when { beforeAgent true expression { !params.RUN_FULL_QA.toBoolean() && !params.TEST_NODE_PERFORMANCE.toBoolean() } } options { retry(2) } - agent{ label rocmnode("gfx908")} + agent{ label rocmnode("gfx908 || gfx90a")} environment{ setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 " -DBUILD_DEV=On """}" } steps{ - runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release', gpu_arch: "gfx908") + runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') } } stage("Run ckProfiler: gfx90a") @@ -650,7 +730,7 @@ pipeline { setup_args = "${params.COMPILER_VERSION == "ck-9110" ? """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 -Xclang -mlink-builtin-bitcode -Xclang /opt/rocm/amdgcn/bitcode/oclc_abi_version_400.bc" -DBUILD_DEV=On """ : """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx90a -O3 " -DBUILD_DEV=On """}" } steps{ - runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release', gpu_arch: "gfx90a") + runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') } } } @@ -659,24 +739,10 @@ pipeline { { parallel { - stage("Process results for gfx908"){ - when { - beforeAgent true - expression { !params.RUN_FULL_QA.toBoolean() && !params.TEST_NODE_PERFORMANCE.toBoolean() } - } + stage("Process results"){ agent { label 'mici' } steps{ - process_results(gpu_arch: "gfx908") - } - } - stage("Process results for gfx90a"){ - when { - beforeAgent true - expression { params.RUN_FULL_QA.toBoolean() || params.TEST_NODE_PERFORMANCE.toBoolean() } - } - agent { label 'mici' } - steps{ - process_results(gpu_arch: "gfx90a") + process_results() } } } diff --git a/dev-requirements.txt b/dev-requirements.txt index 5d123edb85..9134ecebe1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,2 @@ ROCmSoftwarePlatform/rocm-recipes # 1.90+ -danmar/cppcheck@dd05839a7e63ef04afd34711cb3e1e0ef742882f \ No newline at end of file diff --git a/script/process_perf_data.sh b/script/process_perf_data.sh index b68a7c1b2f..15fc5cb15f 100755 --- a/script/process_perf_data.sh +++ b/script/process_perf_data.sh @@ -2,15 +2,14 @@ # # in order to run this script you'd need the following python packages: -pip3 install --upgrade pip -pip3 install sqlalchemy pymysql pandas sshtunnel +#pip3 install --upgrade pip +#pip3 install sqlalchemy pymysql pandas sshtunnel # you would also need to set up some environment variables in order to # post your new test results to the database and compare them to the baseline # please contact Illia.Silin@amd.com for more details #process results -gpu_arch=$1 -python3 process_perf_data.py perf_gemm_"$gpu_arch".log -python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log -python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log +python3 process_perf_data.py perf_gemm.log +python3 process_perf_data.py perf_resnet50_N256.log +python3 process_perf_data.py perf_resnet50_N4.log diff --git a/script/process_qa_data.sh b/script/process_qa_data.sh index 917305e916..abf1e6234e 100755 --- a/script/process_qa_data.sh +++ b/script/process_qa_data.sh @@ -10,15 +10,14 @@ # please contact Illia.Silin@amd.com for more details #process results -gpu_arch=$1 -python3 process_perf_data.py perf_gemm_"$gpu_arch".log -python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log -python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log -python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log -python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log -python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log -python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log -python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log -python3 process_perf_data.py perf_reduction_"$gpu_arch".log -python3 process_perf_data.py perf_splitK_gemm_"$gpu_arch".log -python3 process_perf_data.py perf_onnx_gemm_"$gpu_arch".log +python3 process_perf_data.py perf_gemm.log +python3 process_perf_data.py perf_resnet50_N256.log +python3 process_perf_data.py perf_resnet50_N4.log +python3 process_perf_data.py perf_batched_gemm.log +python3 process_perf_data.py perf_grouped_gemm.log +python3 process_perf_data.py perf_conv_fwd.log +python3 process_perf_data.py perf_conv_bwd_data.log +python3 process_perf_data.py perf_gemm_bilinear.log +python3 process_perf_data.py perf_reduction.log +python3 process_perf_data.py perf_splitK_gemm.log +python3 process_perf_data.py perf_onnx_gemm.log diff --git a/script/run_full_performance_tests.sh b/script/run_full_performance_tests.sh index 1626b7f28d..eae334ae2d 100755 --- a/script/run_full_performance_tests.sh +++ b/script/run_full_performance_tests.sh @@ -5,12 +5,11 @@ # post your new test results to the database and compare them to the baseline # please contact Illia.Silin@amd.com for more details # -# run the script as "./run_full_performance_tests.sh < node name> +# run the script as "./run_full_performance_tests.sh < node name> # input arguments: # verification = 0 : do not verify result correctness on CPU # = 1 : verifuy correctness on CPU (may take a long time) # environment tag : a string describing the specifics of your test environment -# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a". # branch name : name of the branch in git repo (git status | grep -e 'On branch') # node name : $hostname @@ -19,11 +18,9 @@ export verify=$1 echo 'Verification: ' $verify export env_type=$2 echo 'Environment type: ' $env_type -export gpu_arch=$3 -echo 'GPU architecture: ' $gpu_arch -export branch=$4 +export branch=$3 echo 'Branch name: ' $branch -export host_name=$5 +export host_name=$4 echo 'Host name: ' $host_name function print_log_header(){ rm -f $1; @@ -38,7 +35,7 @@ function print_log_header(){ } #run gemm tests -export gemm_log="perf_gemm_${gpu_arch}.log" +export gemm_log="perf_gemm.log" print_log_header $gemm_log $env_type $branch $host_name ./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log @@ -58,7 +55,7 @@ print_log_header $gemm_log $env_type $branch $host_name ./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log #run batched_gemm tests -export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log" +export batched_gemm_log="perf_batched_gemm.log" print_log_header $batched_gemm_log $env_type $branch $host_name ./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log @@ -78,7 +75,7 @@ print_log_header $batched_gemm_log $env_type $branch $host_name ./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log #run grouped_gemm tests -export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log" +export grouped_gemm_log="perf_grouped_gemm.log" print_log_header $grouped_gemm_log $env_type $branch $host_name ./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log @@ -86,7 +83,7 @@ print_log_header $grouped_gemm_log $env_type $branch $host_name ./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log #run GEMM+Bilinear tests -export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log" +export gemm_bilinear_log="perf_gemm_bilinear.log" print_log_header $gemm_bilinear_log $env_type $branch $host_name ./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log @@ -94,7 +91,7 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log #run conv_fwd tests -export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log" +export conv_fwd_log="perf_conv_fwd.log" print_log_header $conv_fwd_log $env_type $branch $host_name ./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log @@ -102,7 +99,7 @@ print_log_header $conv_fwd_log $env_type $branch $host_name ./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log #run conv_bwd_data tests -export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log" +export conv_bwd_data_log="perf_conv_bwd_data.log" print_log_header $conv_bwd_data_log $env_type $branch $host_name ./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log @@ -110,33 +107,43 @@ print_log_header $conv_bwd_data_log $env_type $branch $host_name ./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log #run resnet50 tests -export resnet256_log="perf_resnet50_N256_${gpu_arch}.log" +export resnet256_log="perf_resnet50_N256.log" print_log_header $resnet256_log $env_type $branch $host_name ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log -export resnet4_log="perf_resnet50_N4_${gpu_arch}.log" +export resnet4_log="perf_resnet50_N4.log" print_log_header $resnet4_log $env_type $branch $host_name ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log #run reduction tests -export reduction_log="perf_reduction_${gpu_arch}.log" +export reduction_log="perf_reduction.log" print_log_header $reduction_log $env_type $branch $host_name ./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log ./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log -#run splitK_gemm tests -export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log" +#run splitK_gemm tests, first correctness verification, then performance +export splitK_gemm_ver_log="perf_splitK_gemm_verify.log" +print_log_header $splitK_gemm_ver_log $env_type $branch $host_name +./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log +export splitK_gemm_log="perf_splitK_gemm.log" print_log_header $splitK_gemm_log $env_type $branch $host_name -./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log -./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 0 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 0 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 0 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 0 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 1 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 1 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 1 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log +./profile_splitK_gemm.sh gemm_splitk 1 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log #run ONNX gemm tests -export onnx_log="perf_onnx_gemm_${gpu_arch}.log" +export onnx_log="perf_onnx_gemm.log" print_log_header $onnx_log $env_type $branch $host_name ./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log ./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log diff --git a/script/run_performance_tests.sh b/script/run_performance_tests.sh index f8ec2cbe49..4e3a6fc8eb 100755 --- a/script/run_performance_tests.sh +++ b/script/run_performance_tests.sh @@ -1,12 +1,11 @@ #!/bin/bash # # in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/ -# run the script as "./run_performance_tests.sh < node name> +# run the script as "./run_performance_tests.sh < node name> # input arguments: # verification = 0 : do not verify result correctness on CPU # = 1 : verify correctness on CPU (may take a long time) # environment tag : a string describing the specifics of your test environment -# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a". # branch name : name of the branch in git repo (git status | grep -e 'On branch') # node name : $hostname @@ -15,11 +14,9 @@ export verify=$1 echo 'Verification: ' $verify export env_type=$2 echo 'Environment type: ' $env_type -export gpu_arch=$3 -echo 'GPU architecture: ' $gpu_arch -export branch=$4 +export branch=$3 echo 'Branch name: ' $branch -export host_name=$5 +export host_name=$4 echo 'Host name: ' $host_name function print_log_header(){ @@ -35,7 +32,7 @@ function print_log_header(){ } #run gemm tests -export gemm_log="perf_gemm_${gpu_arch}.log" +export gemm_log="perf_gemm.log" print_log_header $gemm_log $env_type $branch $host_name ./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log @@ -55,9 +52,9 @@ print_log_header $gemm_log $env_type $branch $host_name ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log #run resnet50 tests -export resnet256_log="perf_resnet50_N256_${gpu_arch}.log" +export resnet256_log="perf_resnet50_N256.log" print_log_header $resnet256_log $env_type $branch $host_name ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log -export resnet4_log="perf_resnet50_N4_${gpu_arch}.log" +export resnet4_log="perf_resnet50_N4.log" print_log_header $resnet4_log $env_type $branch $host_name ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log