mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Re-enable the performance tracking in CI. (#1203)
* test CK with rocm6.1 RC2
* add docker credentials for pull
* update the performance db name
* use environment variable for db name
* add rocm-llvm-dev package to ck docker
* turn off verification for daily performance runs
* do not stash ckProfiler on MI300 node
* add processing of mixed gemms to qa, fix parsing of splitk gemm logs
* fix the splitk gemm log file name
* turn the timing on for splitk gemm performance
[ROCm/composable_kernel commit: bdcd037428]
This commit is contained in:
19
Dockerfile
19
Dockerfile
@@ -16,17 +16,17 @@ RUN apt-get install -y --allow-unauthenticated apt-utils wget gnupg2 curl
|
||||
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
|
||||
RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
|
||||
|
||||
RUN if [ "$ROCMVERSION" != "6.0.1" ]; then \
|
||||
RUN if [ "$ROCMVERSION" != "6.1" ]; then \
|
||||
sh -c "wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate" && \
|
||||
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ./amdgpu-install_6.0.60000-1_all.deb && \
|
||||
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
|
||||
sh -c "echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] $DEB_ROCM_REPO focal main > /etc/apt/sources.list.d/rocm.list" && \
|
||||
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCMVERSION/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list'; \
|
||||
elif [ "$ROCMVERSION" = "6.0.1" ] && [ "$compiler_version" = "rc1" ]; then \
|
||||
sh -c "wget http://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/amdgpu-install-internal_6.0-20.04-1_all.deb --no-check-certificate" && \
|
||||
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install dialog && DEBIAN_FRONTEND=noninteractive apt-get install ./amdgpu-install-internal_6.0-20.04-1_all.deb && \
|
||||
sh -c 'echo deb [arch=amd64 trusted=yes] http://compute-artifactory.amd.com/artifactory/list/rocm-release-archive-20.04-deb/ 6.0.1 rel-95 > /etc/apt/sources.list.d/rocm-build.list' && \
|
||||
amdgpu-repo --amdgpu-build=1704947; \
|
||||
elif [ "$ROCMVERSION" = "6.1" ] && [ "$compiler_version" = "rc2" ]; then \
|
||||
sh -c "wget http://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/amdgpu-install-internal_6.1-20.04-1_all.deb --no-check-certificate" && \
|
||||
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install dialog && DEBIAN_FRONTEND=noninteractive apt-get install ./amdgpu-install-internal_6.1-20.04-1_all.deb && \
|
||||
sh -c 'echo deb [arch=amd64 trusted=yes] http://compute-artifactory.amd.com/artifactory/list/rocm-release-archive-20.04-deb/ 6.1 rel-48 > /etc/apt/sources.list.d/rocm-build.list' && \
|
||||
amdgpu-repo --amdgpu-build=1736298; \
|
||||
fi
|
||||
|
||||
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
|
||||
@@ -41,6 +41,7 @@ chmod +x ${SCCACHE_INSTALL_LOCATION}/sccache
|
||||
ENV PATH=$PATH:${SCCACHE_INSTALL_LOCATION}
|
||||
|
||||
# Install dependencies
|
||||
# hipTensor requires rocm-llvm-dev for rocm versions > 6.0.1
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
|
||||
build-essential \
|
||||
cmake \
|
||||
@@ -60,6 +61,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
redis \
|
||||
rocm-llvm-dev \
|
||||
sshpass \
|
||||
stunnel \
|
||||
software-properties-common \
|
||||
@@ -73,6 +75,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Update the cmake to version 3.27.5
|
||||
RUN pip install --upgrade cmake==3.27.5
|
||||
|
||||
#Install latest ccache
|
||||
RUN git clone https://github.com/ccache/ccache.git && \
|
||||
cd ccache && mkdir build && cd build && cmake .. && make install
|
||||
@@ -82,8 +87,6 @@ RUN wget -qO /usr/local/bin/ninja.gz https://github.com/ninja-build/ninja/releas
|
||||
RUN gunzip /usr/local/bin/ninja.gz
|
||||
RUN chmod a+x /usr/local/bin/ninja
|
||||
RUN git clone https://github.com/nico/ninjatracing.git
|
||||
# Update the cmake to the latest version
|
||||
RUN pip install --upgrade cmake==3.27.5
|
||||
|
||||
#Install latest cppcheck
|
||||
RUN git clone https://github.com/danmar/cppcheck.git && \
|
||||
|
||||
47
Jenkinsfile
vendored
47
Jenkinsfile
vendored
@@ -38,7 +38,7 @@ def getDockerImageName(){
|
||||
img = "${params.USE_CUSTOM_DOCKER}"
|
||||
}
|
||||
else{
|
||||
if (params.ROCMVERSION != "6.0.1"){
|
||||
if (params.ROCMVERSION != "6.1"){
|
||||
if (params.COMPILER_VERSION == "") {
|
||||
img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
|
||||
}
|
||||
@@ -117,7 +117,9 @@ def getDockerImage(Map conf=[:]){
|
||||
{
|
||||
echo "Pulling down image: ${image}"
|
||||
retimage = docker.image("${image}")
|
||||
retimage.pull()
|
||||
withDockerRegistry([ credentialsId: "docker_test_cred", url: "" ]) {
|
||||
retimage.pull()
|
||||
}
|
||||
}
|
||||
catch(Exception ex)
|
||||
{
|
||||
@@ -406,7 +408,7 @@ def runCKProfiler(Map conf=[:]){
|
||||
|
||||
dir("script"){
|
||||
if (params.RUN_FULL_QA){
|
||||
sh "./run_full_performance_tests.sh 1 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
sh "./run_full_performance_tests.sh 0 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
|
||||
archiveArtifacts "perf_gemm.log"
|
||||
archiveArtifacts "perf_resnet50_N256.log"
|
||||
archiveArtifacts "perf_resnet50_N4.log"
|
||||
@@ -416,9 +418,9 @@ def runCKProfiler(Map conf=[:]){
|
||||
archiveArtifacts "perf_conv_bwd_data.log"
|
||||
archiveArtifacts "perf_gemm_bilinear.log"
|
||||
archiveArtifacts "perf_reduction.log"
|
||||
archiveArtifacts "perf_splitK_gemm_verify.log"
|
||||
archiveArtifacts "perf_splitK_gemm.log"
|
||||
archiveArtifacts "perf_onnx_gemm.log"
|
||||
archiveArtifacts "perf_mixed_gemm.log"
|
||||
// stash perf files to master
|
||||
stash name: "perf_gemm.log"
|
||||
stash name: "perf_resnet50_N256.log"
|
||||
@@ -431,6 +433,7 @@ def runCKProfiler(Map conf=[:]){
|
||||
stash name: "perf_reduction.log"
|
||||
stash name: "perf_splitK_gemm.log"
|
||||
stash name: "perf_onnx_gemm.log"
|
||||
stash name: "perf_mixed_gemm.log"
|
||||
//we will process results on the master node
|
||||
}
|
||||
else{
|
||||
@@ -493,9 +496,6 @@ def Build_CK(Map conf=[:]){
|
||||
|
||||
def variant = env.STAGE_NAME
|
||||
def retimage
|
||||
def navi_node = 0
|
||||
def mi300_node = 0
|
||||
|
||||
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
|
||||
try {
|
||||
(retimage, image) = getDockerImage(conf)
|
||||
@@ -508,14 +508,6 @@ def Build_CK(Map conf=[:]){
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') ){
|
||||
navi_node = 1
|
||||
echo "This is a Navi node"
|
||||
}
|
||||
if ( runShell('grep -n "gfx942" rocminfo.log') ){
|
||||
mi300_node = 1
|
||||
echo "This is MI300 node"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -526,15 +518,27 @@ def Build_CK(Map conf=[:]){
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 24, unit: 'HOURS')
|
||||
{
|
||||
//check whether running on Navi or MI300 node
|
||||
def navi_node = 0
|
||||
def mi300_node = 0
|
||||
sh 'rocminfo | tee rocminfo.log'
|
||||
if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') ){
|
||||
navi_node = 1
|
||||
echo "This is a Navi node"
|
||||
}
|
||||
if ( runShell('grep -n "gfx942" rocminfo.log') ){
|
||||
mi300_node = 1
|
||||
echo "This is MI300 node"
|
||||
}
|
||||
cmake_build(conf)
|
||||
dir("build"){
|
||||
//run tests and examples
|
||||
sh 'make -j check'
|
||||
if (navi_node == 0 ){
|
||||
if (params.RUN_PERFORMANCE_TESTS && navi_node == 0 && mi300_node == 0 ){
|
||||
//we only need the ckProfiler to run the performance tests, so we pack and stash it
|
||||
//do not stash profiler on Navi nodes
|
||||
//do not stash profiler on Navi or MI300 nodes
|
||||
sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
|
||||
stash "ckProfiler.tar.gz"
|
||||
stash name: "ckProfiler.tar.gz"
|
||||
}
|
||||
if (params.RUN_FULL_QA && mi300_node == 0 ){
|
||||
// build deb packages for all MI100/200/300 targets and prepare to export
|
||||
@@ -542,7 +546,7 @@ def Build_CK(Map conf=[:]){
|
||||
archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb'
|
||||
archiveArtifacts artifacts: 'composablekernel-tests_*.deb'
|
||||
sh 'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb'
|
||||
stash "ckprofiler_0.2.0_amd64.deb"
|
||||
stash name: "ckprofiler_0.2.0_amd64.deb"
|
||||
}
|
||||
}
|
||||
if (params.hipTensor_test && navi_node == 0 ){
|
||||
@@ -629,6 +633,7 @@ def process_results(Map conf=[:]){
|
||||
unstash "perf_reduction.log"
|
||||
unstash "perf_splitK_gemm.log"
|
||||
unstash "perf_onnx_gemm.log"
|
||||
unstash "perf_mixed_gemm.log"
|
||||
sh "./process_qa_data.sh"
|
||||
unstash "ckprofiler_0.2.0_amd64.deb"
|
||||
sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
|
||||
@@ -716,8 +721,8 @@ pipeline {
|
||||
description: "Run the cppcheck static analysis (default: OFF)")
|
||||
booleanParam(
|
||||
name: "RUN_PERFORMANCE_TESTS",
|
||||
defaultValue: false,
|
||||
description: "Run the performance tests (default: OFF)")
|
||||
defaultValue: true,
|
||||
description: "Run the performance tests (default: ON)")
|
||||
booleanParam(
|
||||
name: "RUN_CODEGEN_TESTS",
|
||||
defaultValue: true,
|
||||
|
||||
@@ -133,11 +133,16 @@ def parse_logfile(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
res.append(lst[4])
|
||||
elif 'onnx_gemm' in logfile or 'splitK_gemm' in logfile or 'mixed_gemm' in logfile:
|
||||
elif 'onnx_gemm' in logfile or 'mixed_gemm' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
res.append(lst[33])
|
||||
elif 'splitK_gemm' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
res.append(lst[36])
|
||||
return res
|
||||
|
||||
|
||||
@@ -231,7 +236,7 @@ def main():
|
||||
sql_hostname = '127.0.0.1'
|
||||
sql_username = os.environ["dbuser"]
|
||||
sql_password = os.environ["dbpassword"]
|
||||
sql_main_database = 'miopen_perf'
|
||||
sql_main_database = os.environ["ck_perf_db"]
|
||||
sql_port = 3306
|
||||
ssh_host = os.environ["dbsship"]
|
||||
ssh_user = os.environ["dbsshuser"]
|
||||
|
||||
@@ -121,26 +121,16 @@ print_log_header $reduction_log $env_type $branch $host_name
|
||||
./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
|
||||
|
||||
#run splitK_gemm tests, first correctness verification, then performance
|
||||
export splitK_gemm_ver_log="perf_splitK_gemm_verify.log"
|
||||
print_log_header $splitK_gemm_ver_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
|
||||
export splitK_gemm_log="perf_splitK_gemm.log"
|
||||
print_log_header $splitK_gemm_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
|
||||
|
||||
#run ONNX gemm tests
|
||||
export onnx_log="perf_onnx_gemm.log"
|
||||
|
||||
Reference in New Issue
Block a user