mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 06:01:23 +00:00
Adding Resnet50 test to Performance tests (#268)
* add resnet50 test to performance tests * add blanks before gpu_arch in log files * add resnet50 test with N=4 and process its results * add ROCM and HIP versions to test tables * uncomment the sql queries * fix script syntax in jenkinsfile
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
import os, io, argparse, datetime
|
||||
import os, io, argparse, datetime, re
|
||||
import numpy as np
|
||||
import sqlalchemy
|
||||
from sqlalchemy.types import NVARCHAR, Float, Integer
|
||||
@@ -45,66 +45,91 @@ def main():
|
||||
StrideB=[]
|
||||
StrideC=[]
|
||||
#parse results, get the Tflops value for "Best Perf" kernels
|
||||
|
||||
glue=""
|
||||
for filename in args.files:
|
||||
for line in open(filename):
|
||||
if 'Branch name' in line:
|
||||
lst=line.split()
|
||||
branch_name=lst[2]
|
||||
for filename in args.files:
|
||||
for line in open(filename):
|
||||
if 'Best Perf' in line:
|
||||
if 'Node name' in line:
|
||||
lst=line.split()
|
||||
if len(lst)>=37: #the line is complete
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append(glue.join(lst[37:]))
|
||||
tflops.append(lst[33])
|
||||
dtype.append(lst[5])
|
||||
alayout.append(lst[8])
|
||||
blayout.append(lst[11])
|
||||
M.append(lst[14])
|
||||
N.append(lst[17])
|
||||
K.append(lst[20])
|
||||
StrideA.append(lst[23])
|
||||
StrideB.append(lst[26])
|
||||
StrideC.append(lst[29])
|
||||
elif len(lst)<37 and len(lst)>=33: #the tflops are available
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append("N/A")
|
||||
tflops.append(lst[33])
|
||||
dtype.append(lst[5])
|
||||
alayout.append(lst[8])
|
||||
blayout.append(lst[11])
|
||||
M.append(lst[14])
|
||||
N.append(lst[17])
|
||||
K.append(lst[20])
|
||||
StrideA.append(lst[23])
|
||||
StrideB.append(lst[26])
|
||||
StrideC.append(lst[29])
|
||||
print("warning: incomplete line:",lst)
|
||||
elif len(lst)<33: #even the tflops are not available
|
||||
print("Error in ckProfiler output!")
|
||||
print("warning: incomplete line=",lst)
|
||||
|
||||
#sort results
|
||||
print("Number of tests:",len(tests))
|
||||
node_id=lst[2]
|
||||
if 'GPU_arch' in line:
|
||||
lst=line.split()
|
||||
gpu_arch=lst[1]
|
||||
if 'HIP version' in line:
|
||||
lst=line.split()
|
||||
hip_vers=lst[2]
|
||||
if 'InstalledDir' in line:
|
||||
lst=line.split()
|
||||
rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')]
|
||||
print("Branch name:",branch_name)
|
||||
#sorted_tests = sorted(tests)
|
||||
#print("sorted tests:",sorted_tests)
|
||||
sorted_tflops = [x for _,x in sorted(zip(tests,tflops))]
|
||||
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
|
||||
test_list=list(range(1,len(tests)+1))
|
||||
print("Node name:",node_id)
|
||||
print("GPU_arch:",gpu_arch)
|
||||
print("ROCM_version:",rocm_vers)
|
||||
print("HIP_version:",hip_vers)
|
||||
|
||||
|
||||
#parse gemm performance tests:
|
||||
if 'gemm' in filename:
|
||||
for filename in args.files:
|
||||
for line in open(filename):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
if len(lst)>=37: #the line is complete
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append(glue.join(lst[37:]))
|
||||
tflops.append(lst[33])
|
||||
dtype.append(lst[5])
|
||||
alayout.append(lst[8])
|
||||
blayout.append(lst[11])
|
||||
M.append(lst[14])
|
||||
N.append(lst[17])
|
||||
K.append(lst[20])
|
||||
StrideA.append(lst[23])
|
||||
StrideB.append(lst[26])
|
||||
StrideC.append(lst[29])
|
||||
elif len(lst)<37 and len(lst)>=33: #the tflops are available
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append("N/A")
|
||||
tflops.append(lst[33])
|
||||
dtype.append(lst[5])
|
||||
alayout.append(lst[8])
|
||||
blayout.append(lst[11])
|
||||
M.append(lst[14])
|
||||
N.append(lst[17])
|
||||
K.append(lst[20])
|
||||
StrideA.append(lst[23])
|
||||
StrideB.append(lst[26])
|
||||
StrideC.append(lst[29])
|
||||
print("warning: incomplete line:",lst)
|
||||
elif len(lst)<33: #even the tflops are not available
|
||||
print("Error in ckProfiler output!")
|
||||
print("warning: incomplete line=",lst)
|
||||
#sort results
|
||||
#sorted_tests = sorted(tests)
|
||||
#print("sorted tests:",sorted_tests)
|
||||
sorted_tflops = [x for _,x in sorted(zip(tests,tflops))]
|
||||
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
|
||||
test_list=list(range(1,len(tests)+1))
|
||||
|
||||
#parse resnet50 performance tests:
|
||||
if 'resnet50' in filename:
|
||||
for filename in args.files:
|
||||
for line in open(filename):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
tflops.append(lst[4])
|
||||
|
||||
print("Number of tests:",len(tflops))
|
||||
sql_hostname = '127.0.0.1'
|
||||
sql_username = os.environ["dbuser"]
|
||||
print("sql_username=",sql_username)
|
||||
sql_password = os.environ["dbpassword"]
|
||||
sql_main_database = 'miopen_perf'
|
||||
sql_port = 3306
|
||||
ssh_host = os.environ["dbsship"]
|
||||
print("ssh_host=",ssh_host)
|
||||
ssh_user = os.environ["dbsshuser"]
|
||||
print("ssh_user=",ssh_user)
|
||||
ssh_port = int(os.environ["dbsshport"])
|
||||
ssh_pass = os.environ["dbsshpassword"]
|
||||
|
||||
@@ -118,75 +143,140 @@ def main():
|
||||
format(sql_username, sql_password, sql_hostname, tunnel.local_bind_port, sql_main_database))
|
||||
conn = sqlEngine.connect()
|
||||
|
||||
#write the ck_gemm_test_params table
|
||||
#only needed once the test set changes
|
||||
'''
|
||||
sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
|
||||
sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
|
||||
sorted_blayout = [x for _,x in sorted(zip(tests,blayout))]
|
||||
sorted_M = [x for _,x in sorted(zip(tests,M))]
|
||||
sorted_N = [x for _,x in sorted(zip(tests,N))]
|
||||
sorted_K = [x for _,x in sorted(zip(tests,K))]
|
||||
sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))]
|
||||
sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))]
|
||||
sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))]
|
||||
ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout,
|
||||
sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB,
|
||||
sorted_StrideC]
|
||||
df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type',
|
||||
'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC'])
|
||||
print(df)
|
||||
#save gemm performance tests:
|
||||
if 'gemm' in filename:
|
||||
|
||||
dtypes = {
|
||||
'Test_number': Integer(),
|
||||
'Data_type': NVARCHAR(length=5),
|
||||
'Alayout': NVARCHAR(length=12),
|
||||
'Blayout': NVARCHAR(length=12),
|
||||
'M': Integer(),
|
||||
'N': Integer(),
|
||||
'K': Integer(),
|
||||
'StrideA': Integer(),
|
||||
'StrideB': Integer(),
|
||||
'StrideC': Integer()
|
||||
}
|
||||
df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes)
|
||||
'''
|
||||
#write the ck_gemm_test_params table
|
||||
#only needed once the test set changes
|
||||
'''
|
||||
sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
|
||||
sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
|
||||
sorted_blayout = [x for _,x in sorted(zip(tests,blayout))]
|
||||
sorted_M = [x for _,x in sorted(zip(tests,M))]
|
||||
sorted_N = [x for _,x in sorted(zip(tests,N))]
|
||||
sorted_K = [x for _,x in sorted(zip(tests,K))]
|
||||
sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))]
|
||||
sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))]
|
||||
sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))]
|
||||
ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout,
|
||||
sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB,
|
||||
sorted_StrideC]
|
||||
df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type',
|
||||
'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC'])
|
||||
print(df)
|
||||
|
||||
#read baseline results for the latest develop branch
|
||||
query = '''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );'''
|
||||
tflops_base = pd.read_sql_query(query, conn)
|
||||
dtypes = {
|
||||
'Test_number': Integer(),
|
||||
'Data_type': NVARCHAR(length=5),
|
||||
'Alayout': NVARCHAR(length=12),
|
||||
'Blayout': NVARCHAR(length=12),
|
||||
'M': Integer(),
|
||||
'N': Integer(),
|
||||
'K': Integer(),
|
||||
'StrideA': Integer(),
|
||||
'StrideB': Integer(),
|
||||
'StrideC': Integer()
|
||||
}
|
||||
df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes)
|
||||
'''
|
||||
|
||||
#read baseline results for the latest develop branch
|
||||
query = '''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );'''
|
||||
tflops_base = pd.read_sql_query(query, conn)
|
||||
|
||||
#write new results to the db
|
||||
testlist=[]
|
||||
for i in range(1,len(tests)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
ck_gemm_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())]
|
||||
flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime'])
|
||||
df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist)
|
||||
flops=pd.concat([flops,df_add],axis=1)
|
||||
print("new tflops for gemm tests:",flops)
|
||||
flops.to_sql("ck_gemm_tflops",conn,if_exists='append',index=False)
|
||||
|
||||
#save resnet50 performance tests:
|
||||
if 'resnet50' in filename:
|
||||
#read baseline results for the latest develop branch
|
||||
query = '''SELECT * from ck_resnet50_N256_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N256_tflops where Branch_ID='develop' );'''
|
||||
tflops_base_N256 = pd.read_sql_query(query, conn)
|
||||
query = '''SELECT * from ck_resnet50_N4_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N4_tflops where Branch_ID='develop' );'''
|
||||
tflops_base_N4 = pd.read_sql_query(query, conn)
|
||||
|
||||
#write new results to the db
|
||||
testlist=[]
|
||||
for i in range(1,50):
|
||||
testlist.append("Layer%i"%i)
|
||||
ck_resnet_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())]
|
||||
flops0=pd.DataFrame(data=[ck_resnet_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime'])
|
||||
df_add=pd.DataFrame(data=[tflops[0:49]],columns=testlist)
|
||||
flops=pd.concat([flops0,df_add],axis=1)
|
||||
print("new tflops for N=256 resnet50 test:",flops)
|
||||
flops.to_sql("ck_resnet50_N256_tflops",conn,if_exists='append',index=False)
|
||||
df_add=pd.DataFrame(data=[tflops[49:98]],columns=testlist)
|
||||
flops=pd.concat([flops0,df_add],axis=1)
|
||||
print("new tflops for N=4 resnet50 test:",flops)
|
||||
flops.to_sql("ck_resnet50_N4_tflops",conn,if_exists='append',index=False)
|
||||
|
||||
#write new results to the db
|
||||
testlist=[]
|
||||
for i in range(1,len(tests)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
ck_gemm_tflops=[str(branch_name),str(datetime.datetime.now())]
|
||||
flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Datetime'])
|
||||
df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist)
|
||||
flops=pd.concat([flops,df_add],axis=1)
|
||||
print("new tflops results:",flops)
|
||||
flops.to_sql("ck_gemm_tflops",conn,if_exists='append',index=False)
|
||||
conn.close()
|
||||
|
||||
#compare the results to the baseline
|
||||
#compare the results to the baseline if baseline exists
|
||||
regression=0
|
||||
base=tflops_base[testlist].to_numpy(dtype='float')
|
||||
base_list=base[0]
|
||||
ave_perf=0
|
||||
for i in range(len(base_list)):
|
||||
# success criterion:
|
||||
if base_list[i]>1.01*float(sorted_tflops[i]):
|
||||
print("test # ",i,"shows regression by {:.3f}%".format(
|
||||
(float(sorted_tflops[i])-base_list[i])/base_list[i]*100))
|
||||
regression=1
|
||||
ave_perf=ave_perf+float(sorted_tflops[i])/base_list[i]
|
||||
if regression==0:
|
||||
print("no regressions found")
|
||||
ave_perf=ave_perf/len(base_list)
|
||||
print("average performance relative to baseline:",ave_perf)
|
||||
if 'gemm' in filename:
|
||||
if not tflops_base.empty:
|
||||
base=tflops_base[testlist].to_numpy(dtype='float')
|
||||
base_list=base[0]
|
||||
ave_perf=0
|
||||
for i in range(len(base_list)):
|
||||
# success criterion:
|
||||
if base_list[i]>1.01*float(sorted_tflops[i]):
|
||||
print("test # ",i,"shows regression by {:.3f}%".format(
|
||||
(float(sorted_tflops[i])-base_list[i])/base_list[i]*100))
|
||||
regression=1
|
||||
ave_perf=ave_perf+float(sorted_tflops[i])/base_list[i]
|
||||
if regression==0:
|
||||
print("no regressions found")
|
||||
ave_perf=ave_perf/len(base_list)
|
||||
print("average performance relative to baseline:",ave_perf)
|
||||
else:
|
||||
print("could not find a baseline")
|
||||
if 'resnet50' in filename:
|
||||
if not tflops_base_N256.empty:
|
||||
base=tflops_base_N256[testlist].to_numpy(dtype='float')
|
||||
base_list=base[0]
|
||||
ave_perf=0
|
||||
for i in range(len(base_list)):
|
||||
# success criterion:
|
||||
if base_list[i]>1.01*float(tflops[i]):
|
||||
print("layer # ",i,"shows regression by {:.3f}%".format(
|
||||
(float(tflops[i])-base_list[i])/base_list[i]*100))
|
||||
regression=1
|
||||
ave_perf=ave_perf+float(tflops[i])/base_list[i]
|
||||
if regression==0:
|
||||
print("no regressions found")
|
||||
ave_perf=ave_perf/len(base_list)
|
||||
print("average performance relative to baseline:",ave_perf)
|
||||
else:
|
||||
print("could not find a baseline for N=256")
|
||||
if not tflops_base_N4.empty:
|
||||
base=tflops_base_N4[testlist].to_numpy(dtype='float')
|
||||
base_list=base[0]
|
||||
ave_perf=0
|
||||
for i in range(len(base_list)):
|
||||
# success criterion:
|
||||
if base_list[i]>1.01*float(tflops[i+49]):
|
||||
print("layer # ",i,"shows regression by {:.3f}%".format(
|
||||
(float(tflops[i+49])-base_list[i])/base_list[i]*100))
|
||||
regression=1
|
||||
ave_perf=ave_perf+float(tflops[i+49])/base_list[i]
|
||||
if regression==0:
|
||||
print("no regressions found")
|
||||
ave_perf=ave_perf/len(base_list)
|
||||
print("average performance relative to baseline:",ave_perf)
|
||||
else:
|
||||
print("could not find a baseline for N=4")
|
||||
|
||||
#return 0 if performance criteria met, otherwise return 1
|
||||
|
||||
return regression
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user