diff --git a/Jenkinsfile b/Jenkinsfile index 3fccb2881b..e9ea3d1c08 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -426,8 +426,9 @@ def runCKProfiler(Map conf=[:]){ archiveArtifacts "perf_resnet50_N4.log" archiveArtifacts "perf_batched_gemm.log" archiveArtifacts "perf_grouped_gemm.log" - archiveArtifacts "perf_conv_fwd.log" - archiveArtifacts "perf_conv_bwd_data.log" + archiveArtifacts "perf_grouped_conv_fwd.log" + archiveArtifacts "perf_grouped_conv_bwd_data.log" + archiveArtifacts "perf_grouped_conv_bwd_weight.log" archiveArtifacts "perf_gemm_bilinear.log" archiveArtifacts "perf_reduction.log" archiveArtifacts "perf_splitK_gemm.log" @@ -439,8 +440,9 @@ def runCKProfiler(Map conf=[:]){ stash name: "perf_resnet50_N4.log" stash name: "perf_batched_gemm.log" stash name: "perf_grouped_gemm.log" - stash name: "perf_conv_fwd.log" - stash name: "perf_conv_bwd_data.log" + stash name: "perf_grouped_conv_fwd.log" + stash name: "perf_grouped_conv_bwd_data.log" + stash name: "perf_grouped_conv_bwd_weight.log" stash name: "perf_gemm_bilinear.log" stash name: "perf_reduction.log" stash name: "perf_splitK_gemm.log" @@ -648,8 +650,9 @@ def process_results(Map conf=[:]){ unstash "perf_resnet50_N4.log" unstash "perf_batched_gemm.log" unstash "perf_grouped_gemm.log" - unstash "perf_conv_fwd.log" - unstash "perf_conv_bwd_data.log" + unstash "perf_grouped_conv_fwd.log" + unstash "perf_grouped_conv_bwd_data.log" + unstash "perf_grouped_conv_bwd_weight.log" unstash "perf_gemm_bilinear.log" unstash "perf_reduction.log" unstash "perf_splitK_gemm.log" @@ -746,6 +749,10 @@ pipeline { name: "RUN_PERFORMANCE_TESTS", defaultValue: true, description: "Run the performance tests (default: ON)") + booleanParam( + name: "RUN_GROUPED_CONV_LARGE_CASES_TESTS", + defaultValue: false, + description: "Run the grouped conv large cases tests (default: OFF)") booleanParam( name: "RUN_CK_TILE_TESTS", defaultValue: false, @@ -837,6 +844,30 @@ pipeline { } } } + stage("Run Grouped Conv Large Case Tests") + { + parallel + { + stage("Run Grouped Conv Large Case Tests on gfx90a") + { + when { + beforeAgent true + expression { params.RUN_GROUPED_CONV_LARGE_CASES_TESTS.toBoolean() } + } + agent{ label rocmnode("gfx90a")} + environment{ + setup_args = "NO_CK_BUILD" + execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \ + make -j64 test_grouped_convnd_fwd_large_cases_xdl && \ + ./bin/test_grouped_convnd_fwd_large_cases_xdl""" + } + steps{ + buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) + cleanWs() + } + } + } + } stage("Run CK_TILE Tests") { parallel diff --git a/script/process_perf_data.py b/script/process_perf_data.py index c6cb6e05c7..b82a7c2891 100644 --- a/script/process_perf_data.py +++ b/script/process_perf_data.py @@ -122,7 +122,7 @@ def parse_logfile(logfile): #sorted_kernels = [x for _,x in sorted(zip(tests,kernels))] test_list=list(range(1,len(tests)+1)) #parse conv_fwd and conv_bwd performance tests: - elif 'conv_fwd' in logfile or 'conv_bwd_data' in logfile: + elif 'conv_fwd' in logfile or 'conv_bwd' in logfile: for line in open(logfile): if 'tflops:' in line: lst=line.split() @@ -274,14 +274,26 @@ def main(): for i in range(1,len(results)+1): testlist.append("Test%i"%i) table_name="ck_grouped_gemm_tflops" - if 'conv_fwd' in filename: + if 'perf_conv_fwd' in filename: for i in range(1,len(results)+1): testlist.append("Test%i"%i) table_name="ck_conv_fwd_tflops" - if 'conv_bwd_data' in filename: + if 'perf_conv_bwd_data' in filename: for i in range(1,len(results)+1): testlist.append("Test%i"%i) table_name="ck_conv_bwd_data_tflops" + if 'grouped_conv_fwd' in filename: + for i in range(1,len(results)+1): + testlist.append("Test%i"%i) + table_name="ck_grouped_conv_fwd_tflops" + if 'grouped_conv_bwd_data' in filename: + for i in range(1,len(results)+1): + testlist.append("Test%i"%i) + table_name="ck_grouped_conv_bwd_data_tflops" + if 'grouped_conv_bwd_weight' in filename: + for i in range(1,len(results)+1): + testlist.append("Test%i"%i) + table_name="ck_grouped_conv_bwd_weight_tflops" if 'gemm_bilinear' in filename: for i in range(1,len(results)+1): testlist.append("Test%i"%i) diff --git a/script/process_qa_data.sh b/script/process_qa_data.sh index bf16f05cd0..d6083d2fc7 100755 --- a/script/process_qa_data.sh +++ b/script/process_qa_data.sh @@ -15,8 +15,9 @@ python3 process_perf_data.py perf_resnet50_N256.log python3 process_perf_data.py perf_resnet50_N4.log python3 process_perf_data.py perf_batched_gemm.log python3 process_perf_data.py perf_grouped_gemm.log -python3 process_perf_data.py perf_conv_fwd.log -python3 process_perf_data.py perf_conv_bwd_data.log +python3 process_perf_data.py perf_grouped_conv_fwd.log +python3 process_perf_data.py perf_grouped_conv_bwd_data.log +python3 process_perf_data.py perf_grouped_conv_bwd_weight.log python3 process_perf_data.py perf_gemm_bilinear.log python3 process_perf_data.py perf_reduction.log python3 process_perf_data.py perf_splitK_gemm.log diff --git a/script/profile_conv_bwd_data.sh b/script/profile_grouped_conv_bwd_data.sh similarity index 100% rename from script/profile_conv_bwd_data.sh rename to script/profile_grouped_conv_bwd_data.sh diff --git a/script/profile_conv_fwd.sh b/script/profile_grouped_conv_bwd_weight.sh similarity index 90% rename from script/profile_conv_fwd.sh rename to script/profile_grouped_conv_bwd_weight.sh index a1d2f450c9..e3652202d4 100755 --- a/script/profile_conv_fwd.sh +++ b/script/profile_grouped_conv_bwd_weight.sh @@ -12,27 +12,28 @@ INIT=$5 LOG=$6 TIME=$7 - N=$8 +N=$8 +SplitK=$9 # Resnet50 ######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 - $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $SplitK + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 $SplitK diff --git a/script/profile_grouped_conv_fwd.sh b/script/profile_grouped_conv_fwd.sh new file mode 100755 index 0000000000..9a974525ad --- /dev/null +++ b/script/profile_grouped_conv_fwd.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +## GPU visibility +export HIP_VISIBLE_DEVICES=0 +DRIVER="../build/bin/ckProfiler" + +OP=$1 +DATATYPE=$2 +LAYOUT=$3 +INDEXTYPE=$4 +VERIFY=$5 +INIT=$6 +LOG=$7 +TIME=$8 + + N=$9 + +# Resnet50 +######## op datatype indextype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 + $DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 diff --git a/script/run_full_performance_tests.sh b/script/run_full_performance_tests.sh index 01ac1b0a39..e167ce012b 100755 --- a/script/run_full_performance_tests.sh +++ b/script/run_full_performance_tests.sh @@ -90,21 +90,27 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name ./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log -#run conv_fwd tests -export conv_fwd_log="perf_conv_fwd.log" -print_log_header $conv_fwd_log $env_type $branch $host_name -./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log -./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log -./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log -./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log +#run grouped_fwd tests +export grouped_conv_fwd_log="perf_grouped_conv_fwd.log" +print_log_header $grouped_conv_fwd_log $env_type $branch $host_name +./profile_grouped_conv_fwd.sh grouped_conv_fwd 0 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log +./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log +./profile_grouped_conv_fwd.sh grouped_conv_fwd 2 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log -#run conv_bwd_data tests -export conv_bwd_data_log="perf_conv_bwd_data.log" -print_log_header $conv_bwd_data_log $env_type $branch $host_name -./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log -./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log -./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log -./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log +#run grouped_bwd_data tests +export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data.log" +print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name +./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log +./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log +./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log + +#run grouped_bwd_weight tests +export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight.log" +print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name +./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 0 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log +./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log +./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 2 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log +./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2 $verify 1 0 1 256 4 2>&1 | tee -a $grouped_conv_bwd_weight_log #run resnet50 tests export resnet256_log="perf_resnet50_N256.log" diff --git a/script/run_performance_tests.sh b/script/run_performance_tests.sh index 4e3a6fc8eb..317d270983 100755 --- a/script/run_performance_tests.sh +++ b/script/run_performance_tests.sh @@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name ./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log +#run grouped_fwd fp16 tests +export grouped_conv_fwd_log="perf_grouped_conv_fwd_fp16.log" +print_log_header $conv_fwd_log $env_type $branch $host_name +./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log + +#run grouped_bwd_data fp16 tests +export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data_fp16.log" +print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name +./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log + +#run grouped_bwd_weight fp16 tests +export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight_fp16.log" +print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name +./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log + #run resnet50 tests export resnet256_log="perf_resnet50_N256.log" print_log_header $resnet256_log $env_type $branch $host_name diff --git a/test/grouped_convnd_fwd/CMakeLists.txt b/test/grouped_convnd_fwd/CMakeLists.txt index f611e66243..4ceb4a2d99 100644 --- a/test/grouped_convnd_fwd/CMakeLists.txt +++ b/test/grouped_convnd_fwd/CMakeLists.txt @@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11") endif() endif() +if(GPU_TARGETS MATCHES "gfx9") + add_executable(test_grouped_convnd_fwd_large_cases_xdl test_grouped_convnd_fwd_large_cases_xdl.cpp) + target_compile_options(test_grouped_convnd_fwd_large_cases_xdl PRIVATE -Wno-global-constructors -Wno-undef) + target_link_libraries(test_grouped_convnd_fwd_large_cases_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance) +endif() + add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp) if(result EQUAL 0) target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility) diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp index c86b18e77e..b960676574 100644 --- a/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp +++ b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp @@ -17,7 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test using InLayout = std::tuple_element_t<1, Tuple>; using WeiLayout = std::tuple_element_t<2, Tuple>; using OutLayout = std::tuple_element_t<3, Tuple>; - using IndexType = std::tuple_element_t<4, Tuple>; + using IndexType = ck::index_t; std::vector conv_params; @@ -50,31 +50,28 @@ class TestGroupedConvndFwd : public ::testing::Test using namespace ck::tensor_layout::convolution; -using KernelTypes1d = ::testing::Types, - std::tuple, - std::tuple, - std::tuple>; +using KernelTypes1d = ::testing::Types, + std::tuple, + std::tuple, + std::tuple>; -using KernelTypes2d = ::testing::Types, - std::tuple, - std::tuple, - std::tuple, - std::tuple, - std::tuple, - std::tuple, - std::tuple>; +using KernelTypes2d = ::testing::Types, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple>; -using KernelTypes3d = ::testing::Types, - std::tuple, - std::tuple, - std::tuple, - std::tuple, - std::tuple, - std::tuple, - std::tuple>; - -using KernelTypes2dLargeCases = - ::testing::Types>; +using KernelTypes3d = ::testing::Types, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple>; template class TestGroupedConvndFwd1d : public TestGroupedConvndFwd @@ -91,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd { }; -template -class TestGroupedConvndFwd2dLargeCases : public TestGroupedConvndFwd -{ -}; - TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d); TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d); TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d); -TYPED_TEST_SUITE(TestGroupedConvndFwd2dLargeCases, KernelTypes2dLargeCases); TYPED_TEST(TestGroupedConvndFwd1d, Test1D) { @@ -149,17 +140,3 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D) {3, 96, 1, 1, 1, {3, 3, 3}, {4, 30, 160}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); this->template Run<3>(); } - -TYPED_TEST(TestGroupedConvndFwd2dLargeCases, Test2DLargeCases) -{ - // Case larger than 2GB - this->conv_params.push_back( - {2, 1, 64, 4, 192, {2, 2}, {224, 224}, {224, 224}, {1, 1}, {0, 0}, {0, 0}}); - // With supported NumGroupsToMerge > 1 - this->conv_params.push_back( - {2, 32, 64, 1, 1, {2, 2}, {672, 672}, {672, 672}, {1, 1}, {0, 0}, {0, 0}}); - // When image is larger than 2GB - this->conv_params.push_back( - {2, 1, 1, 256, 256, {3, 3}, {4096, 2048}, {1024, 1024}, {3, 3}, {1, 1}, {1, 1}}); - this->template Run<2>(); -} diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp new file mode 100644 index 0000000000..3d734fa5e5 --- /dev/null +++ b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. + +#include +#include +#include +#include +#include + +#include "profiler/profile_grouped_conv_fwd_impl.hpp" + +template +class TestGroupedConvndFwd : public ::testing::Test +{ + protected: + using DataType = std::tuple_element_t<0, Tuple>; + using InLayout = std::tuple_element_t<1, Tuple>; + using WeiLayout = std::tuple_element_t<2, Tuple>; + using OutLayout = std::tuple_element_t<3, Tuple>; + using IndexType = ck::long_index_t; + + std::vector conv_params; + + template + void Run() + { + EXPECT_FALSE(conv_params.empty()); + bool pass = true; + for(auto& param : conv_params) + { + pass = pass && ck::profiler::profile_grouped_conv_fwd_impl( + true, // do_verification + 1, // init_method: integer value + false, // do_log + false, // time_kernel + param); + } + EXPECT_TRUE(pass); + } +}; + +using namespace ck::tensor_layout::convolution; + +using KernelTypes2d = ::testing::Types, + std::tuple, + std::tuple>; + +using KernelTypes3d = ::testing::Types, + std::tuple, + std::tuple>; + +template +class TestGroupedConvndFwd2d : public TestGroupedConvndFwd +{ +}; + +template +class TestGroupedConvndFwd3d : public TestGroupedConvndFwd +{ +}; + +TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d); +TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d); + +TYPED_TEST(TestGroupedConvndFwd2d, Test2D) +{ + // Case larger than 2GB + this->conv_params.push_back( + {2, 1, 128, 4, 192, {2, 2}, {224, 224}, {224, 224}, {1, 1}, {0, 0}, {0, 0}}); + // With supported NumGroupsToMerge > 1 + this->conv_params.push_back( + {2, 32, 64, 1, 1, {2, 2}, {672, 672}, {672, 672}, {1, 1}, {0, 0}, {0, 0}}); + // When image is larger than 2GB + this->conv_params.push_back( + {2, 2, 2, 128, 128, {3, 3}, {4096, 2048}, {300, 300}, {3, 3}, {1, 1}, {1, 1}}); + this->template Run<2>(); +} + +TYPED_TEST(TestGroupedConvndFwd3d, Test3D) +{ + // Case larger than 2GB + this->conv_params.push_back({3, + 1, + 128, + 4, + 192, + {2, 2, 2}, + {2, 224, 224}, + {1, 224, 224}, + {1, 1, 1}, + {0, 0, 0}, + {0, 0, 0}}); + // With supported NumGroupsToMerge > 1 + this->conv_params.push_back({3, + 32, + 64, + 1, + 1, + {2, 2, 2}, + {360, 2, 672}, + {360, 2, 672}, + {1, 1, 1}, + {0, 0, 0}, + {0, 0, 0}}); + // When image is larger than 2GB + this->conv_params.push_back({3, + 1, + 2, + 128, + 128, + {3, 1, 3}, + {900, 2, 2048}, + {300, 1, 300}, + {3, 2, 3}, + {1, 1, 1}, + {1, 1, 1}}); + this->template Run<3>(); +}