mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Add switch between compilers, make 9110 compiler default, add full QA scripts. (#322)
* adding scripts for full perf test suite
* uncomment the sql queries
* fix typo and chmod a+x for scripts
* dos2unix for all new scripts
* disable verification in full performance test
* fix reduction scripts, add gfrouped_gemm hotfix
* fix the grouped_gemm hotfix and only run reduction for fp16
* change compiler flag syntax
* fix syntax
* add predefinition of dockerArgs
* avoid redefinitions of dockerArgs
* add blank space at the end of dockerArgs
* try to build with release compiler
* adding spaces inside if condition
* limit the number of threads for building 9110 compiler
* change the way HIP_CLANG_PATH is set
* remove the export command
* change the conditional ENV syntax
* set HIP_CLANG_PATH at docker run time
* update scripts for full qa
* enable the sql write query
* fix typo
* remove a comment from a script
[ROCm/composable_kernel commit: 39acaea36d]
This commit is contained in:
296
script/process_perf_data.py
Normal file
296
script/process_perf_data.py
Normal file
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
import os, io, argparse, datetime
|
||||
#import numpy as np
|
||||
import sqlalchemy
|
||||
from sqlalchemy.types import NVARCHAR, Float, Integer
|
||||
import pymysql
|
||||
import pandas as pd
|
||||
from sshtunnel import SSHTunnelForwarder
|
||||
|
||||
def print_to_string(*args, **kwargs):
|
||||
output = io.StringIO()
|
||||
print(*args, file=output, **kwargs)
|
||||
contents = output.getvalue()
|
||||
output.close()
|
||||
return contents
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs')
|
||||
parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files')
|
||||
args = parser.parse_args()
|
||||
files = []
|
||||
if os.path.isdir(args.filename):
|
||||
all_files = os.listdir(args.filename)
|
||||
for name in all_files:
|
||||
if not 'log' in name:
|
||||
continue
|
||||
files.append(os.path.join(args.filename, name))
|
||||
else:
|
||||
files = [args.filename]
|
||||
args.files = files
|
||||
return args
|
||||
|
||||
def get_log_params(logfile):
|
||||
print("logfile=",logfile)
|
||||
branch_name=' '
|
||||
node_id=' '
|
||||
gpu_arch=' '
|
||||
hip_vers=' '
|
||||
compute_units=0
|
||||
environment=' '
|
||||
rocm_vers=' '
|
||||
for line in open(logfile):
|
||||
if 'Branch name' in line:
|
||||
lst=line.split()
|
||||
branch_name=lst[2]
|
||||
if 'On branch' in line:
|
||||
lst=line.split()
|
||||
branch_name=lst[2]
|
||||
if 'Node name' in line:
|
||||
lst=line.split()
|
||||
node_id=lst[2]
|
||||
if 'GPU_arch' in line:
|
||||
lst=line.split()
|
||||
gpu_arch=lst[2]
|
||||
if 'HIP version' in line:
|
||||
lst=line.split()
|
||||
hip_vers=lst[2]
|
||||
if 'Compute Unit' in line:
|
||||
lst=line.split()
|
||||
compute_units=lst[2]
|
||||
if 'Environment type' in line:
|
||||
lst=line.split()
|
||||
environment=lst[2]
|
||||
if 'InstalledDir' in line:
|
||||
lst=line.split()
|
||||
rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')]
|
||||
return branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment
|
||||
|
||||
def parse_logfile(logfile):
|
||||
glue=''
|
||||
res=[]
|
||||
tests=[]
|
||||
kernels=[]
|
||||
tflops=[]
|
||||
dtype=[]
|
||||
alayout=[]
|
||||
blayout=[]
|
||||
M=[]
|
||||
N=[]
|
||||
K=[]
|
||||
StrideA=[]
|
||||
StrideB=[]
|
||||
StrideC=[]
|
||||
if 'perf_gemm' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
print("len(lst)=",len(lst),"lst:",lst)
|
||||
if len(lst)>=37: #the line is complete
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append(glue.join(lst[37:]))
|
||||
tflops.append(lst[33])
|
||||
dtype.append(lst[5])
|
||||
alayout.append(lst[8])
|
||||
blayout.append(lst[11])
|
||||
M.append(lst[14])
|
||||
N.append(lst[17])
|
||||
K.append(lst[20])
|
||||
StrideA.append(lst[23])
|
||||
StrideB.append(lst[26])
|
||||
StrideC.append(lst[29])
|
||||
elif len(lst)<37 and len(lst)>=33: #the tflops are available
|
||||
tests.append(glue.join(lst[5:30]))
|
||||
kernels.append("N/A")
|
||||
tflops.append(lst[33])
|
||||
dtype.append(lst[5])
|
||||
alayout.append(lst[8])
|
||||
blayout.append(lst[11])
|
||||
M.append(lst[14])
|
||||
N.append(lst[17])
|
||||
K.append(lst[20])
|
||||
StrideA.append(lst[23])
|
||||
StrideB.append(lst[26])
|
||||
StrideC.append(lst[29])
|
||||
print("warning: incomplete line:",lst)
|
||||
elif len(lst)<33: #even the tflops are not available
|
||||
print("Error in ckProfiler output!")
|
||||
print("warning: incomplete line=",lst)
|
||||
#sort results
|
||||
#sorted_tests = sorted(tests)
|
||||
res = [x for _,x in sorted(zip(tests,tflops))]
|
||||
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
|
||||
test_list=list(range(1,len(tests)+1))
|
||||
#parse fwd_conv performance tests:
|
||||
elif 'fwd_conv' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'tflops:' in line:
|
||||
lst=line.split()
|
||||
res.append(lst[1])
|
||||
#parse all other performance tests:
|
||||
elif 'resnet50' or 'batched_gemm' or 'grouped_gemm' or 'bwd_conv' or 'fusion' or 'reduction' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
res.append(lst[4])
|
||||
return res
|
||||
|
||||
|
||||
def get_baseline(table, connection):
|
||||
query = '''SELECT * from '''+table+''' WHERE Datetime = (SELECT MAX(Datetime) FROM '''+table+''' where Branch_ID='develop' );'''
|
||||
return pd.read_sql_query(query, connection)
|
||||
|
||||
def store_new_test_result(table_name, test_results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, connection):
|
||||
params=[str(branch_name),str(node_id),str(gpu_arch),compute_units,str(rocm_vers),str(hip_vers),str(environment),str(datetime.datetime.now())]
|
||||
df=pd.DataFrame(data=[params],columns=['Branch_ID','Node_ID','GPU_arch','Compute Units','ROCM_version','HIP_version','Environment','Datetime'])
|
||||
df_add=pd.DataFrame(data=[test_results],columns=testlist)
|
||||
df=pd.concat([df,df_add],axis=1)
|
||||
print("new test results dataframe:",df)
|
||||
df.to_sql(table_name,connection,if_exists='append',index=False)
|
||||
return 0
|
||||
|
||||
def compare_test_to_baseline(baseline,test,testlist):
|
||||
regression=0
|
||||
if not baseline.empty:
|
||||
base=baseline[testlist].to_numpy(dtype='float')
|
||||
base_list=base[0]
|
||||
ave_perf=0
|
||||
for i in range(len(base_list)):
|
||||
# success criterion:
|
||||
if base_list[i]>1.01*float(test[i]):
|
||||
print("test # ",i,"shows regression by {:.3f}%".format(
|
||||
(float(test[i])-base_list[i])/base_list[i]*100))
|
||||
regression=1
|
||||
ave_perf=ave_perf+float(test[i])/base_list[i]
|
||||
if regression==0:
|
||||
print("no regressions found")
|
||||
ave_perf=ave_perf/len(base_list)
|
||||
print("average performance relative to baseline:",ave_perf)
|
||||
else:
|
||||
print("could not find a baseline")
|
||||
return regression
|
||||
|
||||
'''
|
||||
def post_test_params(tlist,connection):
|
||||
sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
|
||||
sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
|
||||
sorted_blayout = [x for _,x in sorted(zip(tests,blayout))]
|
||||
sorted_M = [x for _,x in sorted(zip(tests,M))]
|
||||
sorted_N = [x for _,x in sorted(zip(tests,N))]
|
||||
sorted_K = [x for _,x in sorted(zip(tests,K))]
|
||||
sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))]
|
||||
sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))]
|
||||
sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))]
|
||||
ck_gemm_params=[tlist,sorted_dtypes,sorted_alayout,sorted_blayout,
|
||||
sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB,
|
||||
sorted_StrideC]
|
||||
df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type',
|
||||
'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC'])
|
||||
print(df)
|
||||
|
||||
dtypes = {
|
||||
'Test_number': Integer(),
|
||||
'Data_type': NVARCHAR(length=5),
|
||||
'Alayout': NVARCHAR(length=12),
|
||||
'Blayout': NVARCHAR(length=12),
|
||||
'M': Integer(),
|
||||
'N': Integer(),
|
||||
'K': Integer(),
|
||||
'StrideA': Integer(),
|
||||
'StrideB': Integer(),
|
||||
'StrideC': Integer()
|
||||
}
|
||||
df.to_sql("ck_gemm_test_params",connection,if_exists='replace',index=False, dtype=dtypes)
|
||||
'''
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
results=[]
|
||||
tflops_base=[]
|
||||
testlist=[]
|
||||
#parse the test parameters from the logfile
|
||||
for filename in args.files:
|
||||
branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment = get_log_params(filename)
|
||||
|
||||
print("Branch name:",branch_name)
|
||||
print("Node name:",node_id)
|
||||
print("GPU_arch:",gpu_arch)
|
||||
print("Compute units:",compute_units)
|
||||
print("ROCM_version:",rocm_vers)
|
||||
print("HIP_version:",hip_vers)
|
||||
print("Environment:",environment)
|
||||
#parse results, get the Tflops value for "Best Perf" kernels
|
||||
results=parse_logfile(filename)
|
||||
|
||||
print("Number of tests:",len(results))
|
||||
sql_hostname = '127.0.0.1'
|
||||
sql_username = os.environ["dbuser"]
|
||||
sql_password = os.environ["dbpassword"]
|
||||
sql_main_database = 'miopen_perf'
|
||||
sql_port = 3306
|
||||
ssh_host = os.environ["dbsship"]
|
||||
ssh_user = os.environ["dbsshuser"]
|
||||
ssh_port = int(os.environ["dbsshport"])
|
||||
ssh_pass = os.environ["dbsshpassword"]
|
||||
|
||||
with SSHTunnelForwarder(
|
||||
(ssh_host, ssh_port),
|
||||
ssh_username=ssh_user,
|
||||
ssh_password=ssh_pass,
|
||||
remote_bind_address=(sql_hostname, sql_port)) as tunnel:
|
||||
|
||||
sqlEngine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}:{3}/{4}'.
|
||||
format(sql_username, sql_password, sql_hostname, tunnel.local_bind_port, sql_main_database))
|
||||
conn = sqlEngine.connect()
|
||||
|
||||
#save gemm performance tests:
|
||||
if 'perf_gemm' in filename:
|
||||
#write the ck_gemm_test_params table only needed once the test set changes
|
||||
#post_test_params(test_list,conn)
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_gemm_tflops"
|
||||
if 'batched_gemm' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_batched_gemm_tflops"
|
||||
if 'grouped_gemm' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_grouped_gemm_tflops"
|
||||
if 'fwd_conv' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_fwd_conv_tflops"
|
||||
if 'bwd_conv' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_bwd_conv_tflops"
|
||||
if 'fusion' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_fusion_tflops"
|
||||
if 'reduction' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_reduction_GBps"
|
||||
if 'resnet50_N4' in filename:
|
||||
for i in range(1,50):
|
||||
testlist.append("Layer%i"%i)
|
||||
table_name="ck_resnet50_N4_tflops"
|
||||
if 'resnet50_N256' in filename:
|
||||
for i in range(1,50):
|
||||
testlist.append("Layer%i"%i)
|
||||
table_name="ck_resnet50_N256_tflops"
|
||||
|
||||
tflops_base = get_baseline(table_name,conn)
|
||||
store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, conn)
|
||||
conn.close()
|
||||
|
||||
#compare the results to the baseline if baseline exists
|
||||
regression=0
|
||||
regression=compare_test_to_baseline(tflops_base,results,testlist)
|
||||
return regression
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
36
script/profile_batched_gemm.sh
Executable file
36
script/profile_batched_gemm.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 2
|
||||
@@ -1,12 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
|
||||
# make -j ckProfiler
|
||||
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
IN_LAYOUT=$3
|
||||
@@ -16,162 +12,27 @@ VERIFY=$6
|
||||
INIT=$7
|
||||
LOG=$8
|
||||
REPEAT=$9
|
||||
|
||||
# test
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
N=${10}
|
||||
|
||||
# Resnet50 (no duplicated layer)
|
||||
N=${10}
|
||||
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
|
||||
|
||||
# Resnet50 fusion
|
||||
####### op_________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C_ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
|
||||
|
||||
# Resnet50
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 230 230 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
# SSD
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 3 7 7 300 300 2 2 1 1 3 3 3 3
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 64 1 1 75 75 2 2 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 64 3 3 75 75 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 1 1 38 38 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 1 1 38 38 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 512 256 3 3 38 38 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 512 1 1 19 19 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 512 256 3 3 19 19 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 512 1 1 10 10 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 10 10 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 256 1 1 5 5 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 5 5 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 256 1 1 3 3 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 3 3 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 340 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 512 3 3 19 19 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 512 3 3 10 10 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 256 3 3 5 5 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 340 256 3 3 3 3 1 1 1 1 1 1 1 1
|
||||
|
||||
36
script/profile_gemm_bias_relu_add.sh
Executable file
36
script/profile_gemm_bias_relu_add.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 -1
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 2048
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 4096
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 8192
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 1056
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 2080
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 4128
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 8224
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 1088
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 2112
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 4160
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 8256
|
||||
18
script/profile_grouped_gemm.sh
Executable file
18
script/profile_grouped_gemm.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
|
||||
######## op datatype layout verify init log repeat Ms______________ Ns______________ Ks_____________ StrideAs___________ StrideBs__________ StrideCs___________
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256,512,1024,768 128,256,384,1024 128,192,256,512 1024,1025,1044,1026 1024,1024,1024,1024 1025,1024,1028,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 512,768,2048,128 128,256,384,1024 128,192,256,512 1024,1025,2053,1026 1024,1024,1024,1024 1025,1024,2054,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256,512,1024,768 512,256,768,1024 128,192,256,512 1024,1045,1034,1026 1024,1024,1024,1024 1025,1063,1028,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 512,768,4096,768 128,768,512,2048 128,192,256,512 1024,1027,4096,2050 1024,1024,1024,2048 1025,1024,4099,2049
|
||||
@@ -1,6 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
PRECISION=
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
VERIFY="-v $1"
|
||||
INIT=$2
|
||||
NREPEAT=$3
|
||||
PRECISION=$4
|
||||
##PRECISION=--half
|
||||
##PRECISION=--double
|
||||
##PRECISION=--int8
|
||||
@@ -12,14 +15,6 @@ elif [ -n $PRECISION ] && [ "$PRECISION" = "--int8" ]; then
|
||||
ACCTYPE="-C 2"
|
||||
fi
|
||||
|
||||
|
||||
driver="./bin/ckProfiler"
|
||||
|
||||
VERIFY="-v $1"
|
||||
INIT=$2
|
||||
NREPEAT=$3
|
||||
|
||||
|
||||
#### 0 - ADD, 5 - AVG, 7 - NORM2
|
||||
Operations="0 5 7"
|
||||
|
||||
@@ -32,19 +27,19 @@ fi
|
||||
for op in $Operations; do
|
||||
set -x
|
||||
####### datatype layout reduce dims op acctype verify init repeats
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 1,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,22960 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,22960 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 4,1469440 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 4,1469440 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,1,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 1,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,1,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,22960 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,22960 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 4,1469440 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 4,1469440 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
set +x
|
||||
done
|
||||
|
||||
@@ -55,29 +50,29 @@ Operations=5
|
||||
for op in $Operations; do
|
||||
set -x
|
||||
####### datatype layout reduce dims op acctype verify init repeats
|
||||
$driver reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
|
||||
set +x
|
||||
done
|
||||
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
PRECISION=
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
VERIFY="-v $1"
|
||||
INIT=$2
|
||||
NREPEAT=$3
|
||||
PRECISION=$4
|
||||
##PRECISION=--half
|
||||
##PRECISION=--double
|
||||
##PRECISION=--int8
|
||||
##PRECISION=--bf16
|
||||
|
||||
driver="./bin/ckProfiler"
|
||||
|
||||
VERIFY="-v $1"
|
||||
INIT=$2
|
||||
NREPEAT=$3
|
||||
|
||||
#### 2 - MIN, 3 - MAX, 4 - AMAX
|
||||
Operations="2 4"
|
||||
|
||||
@@ -20,19 +17,19 @@ for op in $Operations; do
|
||||
for use_idx in 0 1; do
|
||||
set -x
|
||||
####### datatype layout reduce dims op use index verify init repeats
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 1,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,22960 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,22960 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 4,1469440 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 4,1469440 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,1,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 1,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 64,4,280,82 -R 0,1,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,22960 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,22960 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 4,1469440 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 4,1469440 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
set +x
|
||||
done
|
||||
done
|
||||
@@ -44,29 +41,29 @@ for op in $Operations; do
|
||||
for use_idx in 0 1; do
|
||||
set -x
|
||||
####### datatype layout reduce dims op use index verify init repeats
|
||||
$driver reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$driver reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
$DRIVER reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
|
||||
set +x
|
||||
done
|
||||
done
|
||||
|
||||
171
script/profile_resnet50.sh
Executable file
171
script/profile_resnet50.sh
Executable file
@@ -0,0 +1,171 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
IN_LAYOUT=$3
|
||||
WEI_LAYOUT=$4
|
||||
OUT_LAYOUT=$5
|
||||
VERIFY=$6
|
||||
INIT=$7
|
||||
LOG=$8
|
||||
REPEAT=$9
|
||||
N=${10}
|
||||
|
||||
# test
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
# Resnet50 (no duplicated layer)
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
|
||||
# Resnet50 fusion
|
||||
####### op_________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C_ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
|
||||
|
||||
# Resnet50
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 230 230 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
# SSD
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 3 7 7 300 300 2 2 1 1 3 3 3 3
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 64 1 1 75 75 2 2 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 64 3 3 75 75 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 1 1 38 38 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 1 1 38 38 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 512 256 3 3 38 38 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 512 1 1 19 19 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 512 256 3 3 19 19 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 512 1 1 10 10 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 10 10 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 256 1 1 5 5 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 5 5 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 256 1 1 3 3 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 3 3 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 340 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 512 3 3 19 19 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 512 3 3 10 10 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 256 3 3 5 5 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 340 256 3 3 3 3 1 1 1 1 1 1 1 1
|
||||
124
script/run_full_performance_tests.sh
Executable file
124
script/run_full_performance_tests.sh
Executable file
@@ -0,0 +1,124 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
|
||||
# and make sure the following python packages are installed in your environment:
|
||||
|
||||
pip3 install --upgrade pip
|
||||
pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
|
||||
# you would also need to set up some environment variables in order to
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_full_performance_tests.sh <tag for your test environment>
|
||||
|
||||
#get the test environment type:
|
||||
export env_type=$1
|
||||
echo 'Environment type ' $env_type
|
||||
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
git status | grep -e 'On branch' > $1;
|
||||
echo -n 'Node name: ' >>$1; hostname >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >>$1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm.log"
|
||||
print_log_header $gemm_log $env_type
|
||||
./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a $gemm_log
|
||||
python3 process_perf_data.py $gemm_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a $resnet256_log
|
||||
python3 process_perf_data.py $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a $resnet4_log
|
||||
python3 process_perf_data.py $resnet4_log
|
||||
|
||||
#run batched_gemm tests
|
||||
export batched_gemm_log="perf_batched_gemm.log"
|
||||
print_log_header $batched_gemm_log $env_type
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 0 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 1 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 2 0 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 0 2 0 5 | tee -a $batched_gemm_log
|
||||
python3 process_perf_data.py $batched_gemm_log
|
||||
|
||||
#run grouped_gemm tests
|
||||
export grouped_gemm_log="perf_grouped_gemm.log"
|
||||
print_log_header $grouped_gemm_log $env_type
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 2 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 0 2 0 5 | tee -a $grouped_gemm_log
|
||||
python3 process_perf_data.py $grouped_gemm_log
|
||||
|
||||
#run fwd_conv tests
|
||||
export fwd_conv_log="perf_fwd_conv.log"
|
||||
print_log_header $fwd_conv_log $env_type
|
||||
./profile_conv.sh conv_fwd 0 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 1 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 2 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 3 1 0 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
python3 process_perf_data.py $fwd_conv_log
|
||||
|
||||
#run bwd_conv tests
|
||||
export bwd_conv_log="perf_bwd_conv.log"
|
||||
print_log_header $bwd_conv_log $env_type
|
||||
./profile_conv.sh conv2d_bwd_data 0 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 1 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 2 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 3 1 1 1 0 2 0 5 128 | tee -a $bwd_conv_log
|
||||
python3 process_perf_data.py $bwd_conv_log
|
||||
|
||||
#run fusion tests
|
||||
export fusion_log="perf_fusion.log"
|
||||
print_log_header $fusion_log $env_type
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 0 0 2 0 5 | tee -a $fusion_log
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 1 0 2 0 5 | tee -a $fusion_log
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 2 0 2 0 5 | tee -a $fusion_log
|
||||
./profile_gemm_bias_relu_add.sh gemm_bias_relu_add 1 3 0 2 0 5 | tee -a $fusion_log
|
||||
python3 process_perf_data.py $fusion_log
|
||||
|
||||
#run reduction tests
|
||||
export reduction_log="perf_reduction.log"
|
||||
print_log_header $reduction_log $env_type
|
||||
./profile_reduce_with_index.sh 0 2 10 --half | tee -a $reduction_log
|
||||
./profile_reduce_no_index.sh 0 2 10 --half | tee -a $reduction_log
|
||||
python3 process_perf_data.py $reduction_log
|
||||
@@ -10,17 +10,27 @@ pip3 install sqlalchemy pymysql pandas sshtunnel
|
||||
# post your new test results to the database and compare them to the baseline
|
||||
# please contact Illia.Silin@amd.com for more details
|
||||
#
|
||||
# run the script as "./run_performance_tests.sh <tag for your test environment>
|
||||
|
||||
#get the test environment type:
|
||||
export env_type=$1
|
||||
echo 'Environment type ' $env_type
|
||||
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
git status | grep -e 'On branch' > $1;
|
||||
echo -n 'Node name: ' >>$1; hostname >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >>$1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm.log"
|
||||
rm -f $gemm_log
|
||||
git status | grep -e 'On branch' > ${gemm_log}
|
||||
echo -n 'Node name: ' >>${gemm_log}; hostname >> ${gemm_log}
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> ${gemm_log}; rocminfo | grep "Name:" | grep "gfx" >> ${gemm_log}
|
||||
rocminfo | grep "Compute Unit:" >> ${gemm_log}
|
||||
hipcc --version | grep -e 'HIP version' >> ${gemm_log}
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${gemm_log}
|
||||
./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a ${gemm_log}
|
||||
print_log_header $gemm_log $env_type
|
||||
./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a $gemm_log
|
||||
@@ -36,22 +46,14 @@ hipcc --version | grep -e 'HIP version' >> ${gemm_log}
|
||||
./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a $gemm_log
|
||||
|
||||
python3 parse_perf_data.py ${gemm_log}
|
||||
python3 process_perf_data.py $gemm_log
|
||||
|
||||
#run resnet50 test
|
||||
export resnet_log="perf_resnet50.log"
|
||||
rm -f $resnet_log
|
||||
git status | grep -e 'On branch' > ${resnet_log}
|
||||
echo -n 'Node name: '>>${resnet_log}; hostname >>${resnet_log}
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> ${resnet_log}; rocminfo | grep "Name:" | grep "gfx" >> ${resnet_log}
|
||||
rocminfo | grep "Compute Unit:" >> ${resnet_log}
|
||||
hipcc --version | grep -e 'HIP version' >> ${resnet_log}
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${resnet_log}
|
||||
#first run tests with N=256
|
||||
./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a ${resnet_log}
|
||||
#then run with N=4
|
||||
./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a ${resnet_log}
|
||||
#the script will put the results from N=256 and N=4 runs into separate tables
|
||||
python3 parse_perf_data.py ${resnet_log}
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a $resnet256_log
|
||||
python3 process_perf_data.py $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4.log"
|
||||
print_log_header $resnet4_log $env_type
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a $resnet4_log
|
||||
python3 process_perf_data.py $resnet4_log
|
||||
|
||||
Reference in New Issue
Block a user