From 6dcca8b18cd517dbbb46220386bbca8a483e8b87 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:16:59 -0700 Subject: [PATCH] Adding Resnet50 test to Performance tests (#268) * add resnet50 test to performance tests * add blanks before gpu_arch in log files * add resnet50 test with N=4 and process its results * add ROCM and HIP versions to test tables * uncomment the sql queries * fix script syntax in jenkinsfile [ROCm/composable_kernel commit: 1677cf705eb0f1f96e60d052df0e024bdf007b62] --- Jenkinsfile | 68 ++++++--- script/parse_perf_data.py | 302 +++++++++++++++++++++++++------------- script/profile_conv.sh | 104 ++++++------- 3 files changed, 292 insertions(+), 182 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b912062e64..53b8d26636 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -212,30 +212,50 @@ def runCKProfiler(Map conf=[:]){ { cmake_build(conf) dir("script"){ - def perf_log = "perf_gemm_${gpu_arch}.log" - sh "rm -f ${perf_log}" - sh "echo Branch name: ${env.BRANCH_NAME} > ${perf_log}" - sh "./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 1 1 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 2 1 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 3 1 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 1 2 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 2 2 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 3 2 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a ${perf_log}" - sh "./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a ${perf_log}" - //results will be parsed, stored, and analyzed within the python script - //the script will return 0 if the performance criteria are met - //or return 1 if the criteria are not met - archiveArtifacts "${perf_log}" - sh "python3 parse_perf_data.py ${perf_log} " + //run gemm performance tests + def gemm_log = "perf_gemm_${gpu_arch}.log" + sh "rm -f ${gemm_log}" + sh "echo Branch name: ${env.BRANCH_NAME} > ${gemm_log}" + sh "echo Node name: ${NODE_NAME} >> ${gemm_log}" + sh "echo GPU_arch: ${gpu_arch} >> ${gemm_log}" + sh "hipcc --version | grep -e 'HIP version' >> ${gemm_log}" + sh "/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${gemm_log}" + sh "./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 1 1 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 2 1 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 3 1 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 1 2 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 2 2 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 3 2 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a ${gemm_log}" + sh "./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a ${gemm_log}" + //results will be parsed, stored, and analyzed within the python script + //the script will return 0 if the performance criteria are met + //or return 1 if the criteria are not met + archiveArtifacts "${gemm_log}" + sh "python3 parse_perf_data.py ${gemm_log} " + //run resnet50 test + def resnet_log = "perf_resnet50_${gpu_arch}.log" + sh "rm -f ${resnet_log}" + sh "echo Branch name: ${env.BRANCH_NAME} > ${resnet_log}" + sh "echo Node name: ${NODE_NAME} >> ${resnet_log}" + sh "echo GPU_arch: ${gpu_arch} >> ${resnet_log}" + sh "hipcc --version | grep -e 'HIP version' >> ${resnet_log}" + sh "/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${resnet_log}" + //first run tests with N=256 + sh "./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a ${resnet_log}" + //then run with N=4 + sh "./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a ${resnet_log}" + archiveArtifacts "${resnet_log}" + //the script will put the results from N=256 and N=4 runs into separate tables + sh "python3 parse_perf_data.py ${resnet_log} " } } } diff --git a/script/parse_perf_data.py b/script/parse_perf_data.py index a023a19526..1ec7ae01a7 100644 --- a/script/parse_perf_data.py +++ b/script/parse_perf_data.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -import os, io, argparse, datetime +import os, io, argparse, datetime, re import numpy as np import sqlalchemy from sqlalchemy.types import NVARCHAR, Float, Integer @@ -45,66 +45,91 @@ def main(): StrideB=[] StrideC=[] #parse results, get the Tflops value for "Best Perf" kernels + glue="" for filename in args.files: for line in open(filename): if 'Branch name' in line: lst=line.split() branch_name=lst[2] - for filename in args.files: - for line in open(filename): - if 'Best Perf' in line: + if 'Node name' in line: lst=line.split() - if len(lst)>=37: #the line is complete - tests.append(glue.join(lst[5:30])) - kernels.append(glue.join(lst[37:])) - tflops.append(lst[33]) - dtype.append(lst[5]) - alayout.append(lst[8]) - blayout.append(lst[11]) - M.append(lst[14]) - N.append(lst[17]) - K.append(lst[20]) - StrideA.append(lst[23]) - StrideB.append(lst[26]) - StrideC.append(lst[29]) - elif len(lst)<37 and len(lst)>=33: #the tflops are available - tests.append(glue.join(lst[5:30])) - kernels.append("N/A") - tflops.append(lst[33]) - dtype.append(lst[5]) - alayout.append(lst[8]) - blayout.append(lst[11]) - M.append(lst[14]) - N.append(lst[17]) - K.append(lst[20]) - StrideA.append(lst[23]) - StrideB.append(lst[26]) - StrideC.append(lst[29]) - print("warning: incomplete line:",lst) - elif len(lst)<33: #even the tflops are not available - print("Error in ckProfiler output!") - print("warning: incomplete line=",lst) - - #sort results - print("Number of tests:",len(tests)) + node_id=lst[2] + if 'GPU_arch' in line: + lst=line.split() + gpu_arch=lst[1] + if 'HIP version' in line: + lst=line.split() + hip_vers=lst[2] + if 'InstalledDir' in line: + lst=line.split() + rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')] print("Branch name:",branch_name) - #sorted_tests = sorted(tests) - #print("sorted tests:",sorted_tests) - sorted_tflops = [x for _,x in sorted(zip(tests,tflops))] - #sorted_kernels = [x for _,x in sorted(zip(tests,kernels))] - test_list=list(range(1,len(tests)+1)) + print("Node name:",node_id) + print("GPU_arch:",gpu_arch) + print("ROCM_version:",rocm_vers) + print("HIP_version:",hip_vers) + + #parse gemm performance tests: + if 'gemm' in filename: + for filename in args.files: + for line in open(filename): + if 'Best Perf' in line: + lst=line.split() + if len(lst)>=37: #the line is complete + tests.append(glue.join(lst[5:30])) + kernels.append(glue.join(lst[37:])) + tflops.append(lst[33]) + dtype.append(lst[5]) + alayout.append(lst[8]) + blayout.append(lst[11]) + M.append(lst[14]) + N.append(lst[17]) + K.append(lst[20]) + StrideA.append(lst[23]) + StrideB.append(lst[26]) + StrideC.append(lst[29]) + elif len(lst)<37 and len(lst)>=33: #the tflops are available + tests.append(glue.join(lst[5:30])) + kernels.append("N/A") + tflops.append(lst[33]) + dtype.append(lst[5]) + alayout.append(lst[8]) + blayout.append(lst[11]) + M.append(lst[14]) + N.append(lst[17]) + K.append(lst[20]) + StrideA.append(lst[23]) + StrideB.append(lst[26]) + StrideC.append(lst[29]) + print("warning: incomplete line:",lst) + elif len(lst)<33: #even the tflops are not available + print("Error in ckProfiler output!") + print("warning: incomplete line=",lst) + #sort results + #sorted_tests = sorted(tests) + #print("sorted tests:",sorted_tests) + sorted_tflops = [x for _,x in sorted(zip(tests,tflops))] + #sorted_kernels = [x for _,x in sorted(zip(tests,kernels))] + test_list=list(range(1,len(tests)+1)) + + #parse resnet50 performance tests: + if 'resnet50' in filename: + for filename in args.files: + for line in open(filename): + if 'Best Perf' in line: + lst=line.split() + tflops.append(lst[4]) + + print("Number of tests:",len(tflops)) sql_hostname = '127.0.0.1' sql_username = os.environ["dbuser"] - print("sql_username=",sql_username) sql_password = os.environ["dbpassword"] sql_main_database = 'miopen_perf' sql_port = 3306 ssh_host = os.environ["dbsship"] - print("ssh_host=",ssh_host) ssh_user = os.environ["dbsshuser"] - print("ssh_user=",ssh_user) ssh_port = int(os.environ["dbsshport"]) ssh_pass = os.environ["dbsshpassword"] @@ -118,75 +143,140 @@ def main(): format(sql_username, sql_password, sql_hostname, tunnel.local_bind_port, sql_main_database)) conn = sqlEngine.connect() - #write the ck_gemm_test_params table - #only needed once the test set changes - ''' - sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))] - sorted_alayout = [x for _,x in sorted(zip(tests,alayout))] - sorted_blayout = [x for _,x in sorted(zip(tests,blayout))] - sorted_M = [x for _,x in sorted(zip(tests,M))] - sorted_N = [x for _,x in sorted(zip(tests,N))] - sorted_K = [x for _,x in sorted(zip(tests,K))] - sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))] - sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))] - sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))] - ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout, - sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB, - sorted_StrideC] - df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type', - 'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC']) - print(df) + #save gemm performance tests: + if 'gemm' in filename: - dtypes = { - 'Test_number': Integer(), - 'Data_type': NVARCHAR(length=5), - 'Alayout': NVARCHAR(length=12), - 'Blayout': NVARCHAR(length=12), - 'M': Integer(), - 'N': Integer(), - 'K': Integer(), - 'StrideA': Integer(), - 'StrideB': Integer(), - 'StrideC': Integer() - } - df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes) - ''' + #write the ck_gemm_test_params table + #only needed once the test set changes + ''' + sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))] + sorted_alayout = [x for _,x in sorted(zip(tests,alayout))] + sorted_blayout = [x for _,x in sorted(zip(tests,blayout))] + sorted_M = [x for _,x in sorted(zip(tests,M))] + sorted_N = [x for _,x in sorted(zip(tests,N))] + sorted_K = [x for _,x in sorted(zip(tests,K))] + sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))] + sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))] + sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))] + ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout, + sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB, + sorted_StrideC] + df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type', + 'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC']) + print(df) - #read baseline results for the latest develop branch - query = '''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );''' - tflops_base = pd.read_sql_query(query, conn) + dtypes = { + 'Test_number': Integer(), + 'Data_type': NVARCHAR(length=5), + 'Alayout': NVARCHAR(length=12), + 'Blayout': NVARCHAR(length=12), + 'M': Integer(), + 'N': Integer(), + 'K': Integer(), + 'StrideA': Integer(), + 'StrideB': Integer(), + 'StrideC': Integer() + } + df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes) + ''' + + #read baseline results for the latest develop branch + query = '''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );''' + tflops_base = pd.read_sql_query(query, conn) + + #write new results to the db + testlist=[] + for i in range(1,len(tests)+1): + testlist.append("Test%i"%i) + ck_gemm_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())] + flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime']) + df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist) + flops=pd.concat([flops,df_add],axis=1) + print("new tflops for gemm tests:",flops) + flops.to_sql("ck_gemm_tflops",conn,if_exists='append',index=False) + + #save resnet50 performance tests: + if 'resnet50' in filename: + #read baseline results for the latest develop branch + query = '''SELECT * from ck_resnet50_N256_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N256_tflops where Branch_ID='develop' );''' + tflops_base_N256 = pd.read_sql_query(query, conn) + query = '''SELECT * from ck_resnet50_N4_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N4_tflops where Branch_ID='develop' );''' + tflops_base_N4 = pd.read_sql_query(query, conn) + + #write new results to the db + testlist=[] + for i in range(1,50): + testlist.append("Layer%i"%i) + ck_resnet_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())] + flops0=pd.DataFrame(data=[ck_resnet_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime']) + df_add=pd.DataFrame(data=[tflops[0:49]],columns=testlist) + flops=pd.concat([flops0,df_add],axis=1) + print("new tflops for N=256 resnet50 test:",flops) + flops.to_sql("ck_resnet50_N256_tflops",conn,if_exists='append',index=False) + df_add=pd.DataFrame(data=[tflops[49:98]],columns=testlist) + flops=pd.concat([flops0,df_add],axis=1) + print("new tflops for N=4 resnet50 test:",flops) + flops.to_sql("ck_resnet50_N4_tflops",conn,if_exists='append',index=False) - #write new results to the db - testlist=[] - for i in range(1,len(tests)+1): - testlist.append("Test%i"%i) - ck_gemm_tflops=[str(branch_name),str(datetime.datetime.now())] - flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Datetime']) - df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist) - flops=pd.concat([flops,df_add],axis=1) - print("new tflops results:",flops) - flops.to_sql("ck_gemm_tflops",conn,if_exists='append',index=False) conn.close() - #compare the results to the baseline + #compare the results to the baseline if baseline exists regression=0 - base=tflops_base[testlist].to_numpy(dtype='float') - base_list=base[0] - ave_perf=0 - for i in range(len(base_list)): - # success criterion: - if base_list[i]>1.01*float(sorted_tflops[i]): - print("test # ",i,"shows regression by {:.3f}%".format( - (float(sorted_tflops[i])-base_list[i])/base_list[i]*100)) - regression=1 - ave_perf=ave_perf+float(sorted_tflops[i])/base_list[i] - if regression==0: - print("no regressions found") - ave_perf=ave_perf/len(base_list) - print("average performance relative to baseline:",ave_perf) + if 'gemm' in filename: + if not tflops_base.empty: + base=tflops_base[testlist].to_numpy(dtype='float') + base_list=base[0] + ave_perf=0 + for i in range(len(base_list)): + # success criterion: + if base_list[i]>1.01*float(sorted_tflops[i]): + print("test # ",i,"shows regression by {:.3f}%".format( + (float(sorted_tflops[i])-base_list[i])/base_list[i]*100)) + regression=1 + ave_perf=ave_perf+float(sorted_tflops[i])/base_list[i] + if regression==0: + print("no regressions found") + ave_perf=ave_perf/len(base_list) + print("average performance relative to baseline:",ave_perf) + else: + print("could not find a baseline") + if 'resnet50' in filename: + if not tflops_base_N256.empty: + base=tflops_base_N256[testlist].to_numpy(dtype='float') + base_list=base[0] + ave_perf=0 + for i in range(len(base_list)): + # success criterion: + if base_list[i]>1.01*float(tflops[i]): + print("layer # ",i,"shows regression by {:.3f}%".format( + (float(tflops[i])-base_list[i])/base_list[i]*100)) + regression=1 + ave_perf=ave_perf+float(tflops[i])/base_list[i] + if regression==0: + print("no regressions found") + ave_perf=ave_perf/len(base_list) + print("average performance relative to baseline:",ave_perf) + else: + print("could not find a baseline for N=256") + if not tflops_base_N4.empty: + base=tflops_base_N4[testlist].to_numpy(dtype='float') + base_list=base[0] + ave_perf=0 + for i in range(len(base_list)): + # success criterion: + if base_list[i]>1.01*float(tflops[i+49]): + print("layer # ",i,"shows regression by {:.3f}%".format( + (float(tflops[i+49])-base_list[i])/base_list[i]*100)) + regression=1 + ave_perf=ave_perf+float(tflops[i+49])/base_list[i] + if regression==0: + print("no regressions found") + ave_perf=ave_perf/len(base_list) + print("average performance relative to baseline:",ave_perf) + else: + print("could not find a baseline for N=4") #return 0 if performance criteria met, otherwise return 1 - return regression if __name__ == '__main__': diff --git a/script/profile_conv.sh b/script/profile_conv.sh index f3a6d2c70c..0e97ceb6c6 100755 --- a/script/profile_conv.sh +++ b/script/profile_conv.sh @@ -3,9 +3,9 @@ ## GPU visibility export HIP_VISIBLE_DEVICES=0 - make -j ckProfiler +# make -j ckProfiler - DRIVER="./profiler/ckProfiler" + DRIVER="../build/bin/ckProfiler" OP=$1 DATATYPE=$2 @@ -51,56 +51,56 @@ REPEAT=$9 # Resnet50 from Bing -#################### op____________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 -#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 -#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 +####### op_________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C_ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 +$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 +$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 # Resnet50