mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 10:59:55 +00:00
Add performance and large tensor tests for grouped conv (#1456)
* Add performance and large tensor tests for grouped conv
* Resize tests
* Resize tests
* update the python script to parse the grouped_conv results
* Remove int8 tests
* change bwd wei layout
---------
Co-authored-by: illsilin <Illia.Silin@amd.com>
[ROCm/composable_kernel commit: 2581727d2a]
This commit is contained in:
@@ -122,7 +122,7 @@ def parse_logfile(logfile):
|
||||
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
|
||||
test_list=list(range(1,len(tests)+1))
|
||||
#parse conv_fwd and conv_bwd performance tests:
|
||||
elif 'conv_fwd' in logfile or 'conv_bwd_data' in logfile:
|
||||
elif 'conv_fwd' in logfile or 'conv_bwd' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'tflops:' in line:
|
||||
lst=line.split()
|
||||
@@ -274,14 +274,26 @@ def main():
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_grouped_gemm_tflops"
|
||||
if 'conv_fwd' in filename:
|
||||
if 'perf_conv_fwd' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_conv_fwd_tflops"
|
||||
if 'conv_bwd_data' in filename:
|
||||
if 'perf_conv_bwd_data' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_conv_bwd_data_tflops"
|
||||
if 'grouped_conv_fwd' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_grouped_conv_fwd_tflops"
|
||||
if 'grouped_conv_bwd_data' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_grouped_conv_bwd_data_tflops"
|
||||
if 'grouped_conv_bwd_weight' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_grouped_conv_bwd_weight_tflops"
|
||||
if 'gemm_bilinear' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
|
||||
@@ -15,8 +15,9 @@ python3 process_perf_data.py perf_resnet50_N256.log
|
||||
python3 process_perf_data.py perf_resnet50_N4.log
|
||||
python3 process_perf_data.py perf_batched_gemm.log
|
||||
python3 process_perf_data.py perf_grouped_gemm.log
|
||||
python3 process_perf_data.py perf_conv_fwd.log
|
||||
python3 process_perf_data.py perf_conv_bwd_data.log
|
||||
python3 process_perf_data.py perf_grouped_conv_fwd.log
|
||||
python3 process_perf_data.py perf_grouped_conv_bwd_data.log
|
||||
python3 process_perf_data.py perf_grouped_conv_bwd_weight.log
|
||||
python3 process_perf_data.py perf_gemm_bilinear.log
|
||||
python3 process_perf_data.py perf_reduction.log
|
||||
python3 process_perf_data.py perf_splitK_gemm.log
|
||||
|
||||
@@ -12,27 +12,28 @@ INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
N=$8
|
||||
N=$8
|
||||
SplitK=$9
|
||||
|
||||
# Resnet50
|
||||
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $SplitK
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 $SplitK
|
||||
39
script/profile_grouped_conv_fwd.sh
Executable file
39
script/profile_grouped_conv_fwd.sh
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
INDEXTYPE=$4
|
||||
VERIFY=$5
|
||||
INIT=$6
|
||||
LOG=$7
|
||||
TIME=$8
|
||||
|
||||
N=$9
|
||||
|
||||
# Resnet50
|
||||
######## op datatype indextype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
@@ -90,21 +90,27 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
|
||||
|
||||
#run conv_fwd tests
|
||||
export conv_fwd_log="perf_conv_fwd.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
#run grouped_fwd tests
|
||||
export grouped_conv_fwd_log="perf_grouped_conv_fwd.log"
|
||||
print_log_header $grouped_conv_fwd_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_fwd.sh grouped_conv_fwd 0 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
|
||||
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
|
||||
./profile_grouped_conv_fwd.sh grouped_conv_fwd 2 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
|
||||
|
||||
#run conv_bwd_data tests
|
||||
export conv_bwd_data_log="perf_conv_bwd_data.log"
|
||||
print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
#run grouped_bwd_data tests
|
||||
export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data.log"
|
||||
print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
|
||||
#run grouped_bwd_weight tests
|
||||
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight.log"
|
||||
print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 0 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
|
||||
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
|
||||
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 2 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
|
||||
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2 $verify 1 0 1 256 4 2>&1 | tee -a $grouped_conv_bwd_weight_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
|
||||
@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
|
||||
#run grouped_fwd fp16 tests
|
||||
export grouped_conv_fwd_log="perf_grouped_conv_fwd_fp16.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
|
||||
|
||||
#run grouped_bwd_data fp16 tests
|
||||
export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data_fp16.log"
|
||||
print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
|
||||
#run grouped_bwd_weight fp16 tests
|
||||
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight_fp16.log"
|
||||
print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
|
||||
Reference in New Issue
Block a user