From 02db748e747b9788dcb53e7c4a2f968d5b60149f Mon Sep 17 00:00:00 2001 From: Po Yen Chen Date: Fri, 2 Dec 2022 05:15:02 +0800 Subject: [PATCH] Modularize ckProfiler operations (#514) * Re-structure ckProfiler source files * Rename profiler.cpp to main.cpp * Modularize ckProfiler operations * Add description for profiler operations * Use longer name to avoid name collision * Use macro to delay expansion * Use std::move() to avoid object copying * Prohibit users from calling dtor * Use macro to eliminate redundant code * Make friend function hidden * Add missing include directive * Fix wrong include directives * Remove int8 from batchnorm-forward instances since it is not needed for forward training and could fail test Co-authored-by: Qianfeng Zhang [ROCm/composable_kernel commit: 8784a72e23538d594ea6b1bd527478fba2962d30] --- profiler/CMakeLists.txt | 63 +------- .../include/{ => profiler}/data_type_enum.hpp | 0 .../{ => profiler}/data_type_enum_helper.hpp | 2 +- ...le_batched_gemm_add_relu_gemm_add_impl.hpp | 0 .../profile_batched_gemm_gemm_impl.hpp | 0 .../profile_batched_gemm_impl.hpp | 0 .../profile_batched_gemm_reduce_impl.hpp | 0 ...profile_batched_gemm_softmax_gemm_impl.hpp | 0 ...batched_gemm_softmax_gemm_permute_impl.hpp | 0 .../profile_batchnorm_backward_impl.hpp | 0 .../profile_batchnorm_forward_impl.hpp | 0 .../profile_conv_bwd_data_impl.hpp | 0 .../profile_conv_fwd_bias_relu_add_impl.hpp | 0 .../profile_conv_fwd_bias_relu_impl.hpp | 0 .../{ => profiler}/profile_conv_fwd_impl.hpp | 0 .../profile_convnd_bwd_data_impl.hpp | 0 .../profile_convnd_bwd_weight_impl.hpp | 0 .../profile_elementwise_layernorm_impl.hpp | 0 .../profile_gemm_add_add_fastgelu_impl.hpp | 0 .../profile_gemm_bias_add_reduce_impl.hpp | 0 .../profile_gemm_bilinear_impl.hpp | 0 .../{ => profiler}/profile_gemm_impl.hpp | 0 .../profile_gemm_reduce_impl.hpp | 0 .../profile_gemm_splitk_impl.hpp | 0 .../profile_grouped_conv_bwd_weight_impl.hpp | 0 .../profile_grouped_conv_fwd_impl.hpp | 0 .../profile_grouped_gemm_impl.hpp | 0 .../{ => profiler}/profile_groupnorm_impl.hpp | 0 .../{ => profiler}/profile_layernorm_impl.hpp | 0 .../{ => profiler}/profile_reduce_impl.hpp | 0 .../{ => profiler}/profile_softmax_impl.hpp | 0 profiler/src/CMakeLists.txt | 63 ++++++++ profiler/src/profile_batched_gemm.cpp | 10 +- ...profile_batched_gemm_add_relu_gemm_add.cpp | 11 +- profiler/src/profile_batched_gemm_gemm.cpp | 10 +- profiler/src/profile_batched_gemm_reduce.cpp | 10 +- profiler/src/profile_batchnorm_bwd.cpp | 5 +- profiler/src/profile_batchnorm_fwd.cpp | 5 +- profiler/src/profile_conv_bwd_data.cpp | 10 +- profiler/src/profile_conv_fwd.cpp | 10 +- profiler/src/profile_conv_fwd_bias_relu.cpp | 10 +- .../src/profile_conv_fwd_bias_relu_add.cpp | 11 +- profiler/src/profile_gemm.cpp | 10 +- .../src/profile_gemm_add_add_fastgelu.cpp | 10 +- profiler/src/profile_gemm_bias_add_reduce.cpp | 10 +- profiler/src/profile_gemm_bilinear.cpp | 10 +- profiler/src/profile_gemm_reduce.cpp | 10 +- profiler/src/profile_gemm_splitk.cpp | 10 +- .../src/profile_grouped_conv_bwd_weight.cpp | 10 +- profiler/src/profile_grouped_conv_fwd.cpp | 10 +- profiler/src/profile_grouped_gemm.cpp | 10 +- profiler/src/profile_groupnorm.cpp | 12 +- profiler/src/profile_layernorm.cpp | 7 +- profiler/src/profile_reduce.cpp | 7 +- profiler/src/profile_softmax.cpp | 5 +- profiler/src/profiler.cpp | 150 ++---------------- profiler/src/profiler_operation_registry.hpp | 79 +++++++++ test/CMakeLists.txt | 1 + test/batched_gemm/batched_gemm_bf16.cpp | 2 +- test/batched_gemm/batched_gemm_fp16.cpp | 2 +- test/batched_gemm/batched_gemm_fp32.cpp | 2 +- test/batched_gemm/batched_gemm_int8.cpp | 2 +- .../test_batched_gemm_gemm_util.hpp | 2 +- .../batched_gemm_reduce_fp16.cpp | 2 +- .../test_batched_gemm_softmax_gemm_util.hpp | 2 +- ...batched_gemm_softmax_gemm_permute_util.hpp | 2 +- test/batchnorm/batchnorm_bwd_rank_4.cpp | 2 +- test/batchnorm/batchnorm_fwd_rank_4.cpp | 2 +- test/convnd_bwd_data/convnd_bwd_data.cpp | 2 +- test/convnd_fwd/convnd_fwd.cpp | 2 +- .../test_elementwise_layernorm_fp16.cpp | 2 +- test/gemm_reduce/gemm_reduce_fp16.cpp | 2 +- .../grouped_convnd_bwd_weight.cpp | 2 +- .../grouped_convnd_fwd/grouped_convnd_fwd.cpp | 2 +- test/grouped_gemm/grouped_gemm_fp16.cpp | 2 +- test/normalization/test_groupnorm_fp16.cpp | 2 +- test/normalization/test_groupnorm_fp32.cpp | 2 +- test/normalization/test_layernorm2d_fp16.cpp | 2 +- test/normalization/test_layernorm2d_fp32.cpp | 2 +- test/reduce/reduce_no_index.cpp | 2 +- test/reduce/reduce_with_index.cpp | 2 +- test/softmax/test_softmax_util.hpp | 2 +- 82 files changed, 346 insertions(+), 273 deletions(-) rename profiler/include/{ => profiler}/data_type_enum.hpp (100%) rename profiler/include/{ => profiler}/data_type_enum_helper.hpp (96%) rename profiler/include/{ => profiler}/profile_batched_gemm_add_relu_gemm_add_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batched_gemm_gemm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batched_gemm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batched_gemm_reduce_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batched_gemm_softmax_gemm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batched_gemm_softmax_gemm_permute_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batchnorm_backward_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_batchnorm_forward_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_conv_bwd_data_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_conv_fwd_bias_relu_add_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_conv_fwd_bias_relu_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_conv_fwd_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_convnd_bwd_data_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_convnd_bwd_weight_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_elementwise_layernorm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_gemm_add_add_fastgelu_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_gemm_bias_add_reduce_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_gemm_bilinear_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_gemm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_gemm_reduce_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_gemm_splitk_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_grouped_conv_bwd_weight_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_grouped_conv_fwd_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_grouped_gemm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_groupnorm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_layernorm_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_reduce_impl.hpp (100%) rename profiler/include/{ => profiler}/profile_softmax_impl.hpp (100%) create mode 100644 profiler/src/CMakeLists.txt create mode 100644 profiler/src/profiler_operation_registry.hpp diff --git a/profiler/CMakeLists.txt b/profiler/CMakeLists.txt index 0dccfff476..bdd7125ac1 100644 --- a/profiler/CMakeLists.txt +++ b/profiler/CMakeLists.txt @@ -1,64 +1,5 @@ include_directories(BEFORE - ${PROJECT_SOURCE_DIR}/ + ${CMAKE_CURRENT_LIST_DIR}/include ) -# ck_profiler -set(PROFILER_SOURCE - src/profiler.cpp - src/profile_gemm.cpp - src/profile_gemm_splitk.cpp - src/profile_gemm_bilinear.cpp - src/profile_gemm_bias_add_reduce.cpp - src/profile_gemm_add_add_fastgelu.cpp - src/profile_gemm_reduce.cpp - src/profile_batched_gemm.cpp - src/profile_batched_gemm_gemm.cpp - src/profile_batched_gemm_add_relu_gemm_add.cpp - src/profile_batched_gemm_reduce.cpp - src/profile_grouped_gemm.cpp - src/profile_conv_fwd.cpp - src/profile_conv_fwd_bias_relu.cpp - src/profile_conv_fwd_bias_relu_add.cpp - src/profile_conv_bwd_data.cpp - src/profile_grouped_conv_fwd.cpp - src/profile_grouped_conv_bwd_weight.cpp - src/profile_reduce.cpp - src/profile_groupnorm.cpp - src/profile_layernorm.cpp - src/profile_softmax.cpp - src/profile_batchnorm_fwd.cpp - src/profile_batchnorm_bwd.cpp -) - -add_executable(ckProfiler ${PROFILER_SOURCE}) - -target_link_libraries(ckProfiler PRIVATE utility) -target_link_libraries(ckProfiler PRIVATE device_gemm_instance) -target_link_libraries(ckProfiler PRIVATE device_gemm_splitk_instance) -target_link_libraries(ckProfiler PRIVATE device_gemm_bilinear_instance) -target_link_libraries(ckProfiler PRIVATE device_gemm_add_add_fastgelu_instance) -target_link_libraries(ckProfiler PRIVATE device_gemm_reduce_instance) -target_link_libraries(ckProfiler PRIVATE device_gemm_bias_add_reduce_instance) -target_link_libraries(ckProfiler PRIVATE device_batched_gemm_instance) -target_link_libraries(ckProfiler PRIVATE device_batched_gemm_gemm_instance) -target_link_libraries(ckProfiler PRIVATE device_batched_gemm_add_relu_gemm_add_instance) -target_link_libraries(ckProfiler PRIVATE device_batched_gemm_reduce_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_gemm_instance) -target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_conv1d_fwd_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_conv2d_fwd_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_conv3d_fwd_instance) -target_link_libraries(ckProfiler PRIVATE device_conv1d_bwd_data_instance) -target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance) -target_link_libraries(ckProfiler PRIVATE device_conv3d_bwd_data_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_conv1d_bwd_weight_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_conv2d_bwd_weight_instance) -target_link_libraries(ckProfiler PRIVATE device_grouped_conv3d_bwd_weight_instance) -target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance) -target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance) -target_link_libraries(ckProfiler PRIVATE device_normalization_instance) -target_link_libraries(ckProfiler PRIVATE device_softmax_instance) -target_link_libraries(ckProfiler PRIVATE device_reduce_instance) -target_link_libraries(ckProfiler PRIVATE device_batchnorm_instance) - -rocm_install(TARGETS ckProfiler COMPONENT profiler) +add_subdirectory(src) diff --git a/profiler/include/data_type_enum.hpp b/profiler/include/profiler/data_type_enum.hpp similarity index 100% rename from profiler/include/data_type_enum.hpp rename to profiler/include/profiler/data_type_enum.hpp diff --git a/profiler/include/data_type_enum_helper.hpp b/profiler/include/profiler/data_type_enum_helper.hpp similarity index 96% rename from profiler/include/data_type_enum_helper.hpp rename to profiler/include/profiler/data_type_enum_helper.hpp index 6f8ef2b9f7..d9bd5e1a40 100644 --- a/profiler/include/data_type_enum_helper.hpp +++ b/profiler/include/profiler/data_type_enum_helper.hpp @@ -4,7 +4,7 @@ #pragma #include "ck/utility/data_type.hpp" -#include "profiler/include/data_type_enum.hpp" +#include "profiler/data_type_enum.hpp" namespace ck { diff --git a/profiler/include/profile_batched_gemm_add_relu_gemm_add_impl.hpp b/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp similarity index 100% rename from profiler/include/profile_batched_gemm_add_relu_gemm_add_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp diff --git a/profiler/include/profile_batched_gemm_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp similarity index 100% rename from profiler/include/profile_batched_gemm_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp diff --git a/profiler/include/profile_batched_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_impl.hpp similarity index 100% rename from profiler/include/profile_batched_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_impl.hpp diff --git a/profiler/include/profile_batched_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp similarity index 100% rename from profiler/include/profile_batched_gemm_reduce_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp diff --git a/profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp similarity index 100% rename from profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp diff --git a/profiler/include/profile_batched_gemm_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp similarity index 100% rename from profiler/include/profile_batched_gemm_softmax_gemm_permute_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp diff --git a/profiler/include/profile_batchnorm_backward_impl.hpp b/profiler/include/profiler/profile_batchnorm_backward_impl.hpp similarity index 100% rename from profiler/include/profile_batchnorm_backward_impl.hpp rename to profiler/include/profiler/profile_batchnorm_backward_impl.hpp diff --git a/profiler/include/profile_batchnorm_forward_impl.hpp b/profiler/include/profiler/profile_batchnorm_forward_impl.hpp similarity index 100% rename from profiler/include/profile_batchnorm_forward_impl.hpp rename to profiler/include/profiler/profile_batchnorm_forward_impl.hpp diff --git a/profiler/include/profile_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_conv_bwd_data_impl.hpp similarity index 100% rename from profiler/include/profile_conv_bwd_data_impl.hpp rename to profiler/include/profiler/profile_conv_bwd_data_impl.hpp diff --git a/profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp similarity index 100% rename from profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp diff --git a/profiler/include/profile_conv_fwd_bias_relu_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp similarity index 100% rename from profiler/include/profile_conv_fwd_bias_relu_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp diff --git a/profiler/include/profile_conv_fwd_impl.hpp b/profiler/include/profiler/profile_conv_fwd_impl.hpp similarity index 100% rename from profiler/include/profile_conv_fwd_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_impl.hpp diff --git a/profiler/include/profile_convnd_bwd_data_impl.hpp b/profiler/include/profiler/profile_convnd_bwd_data_impl.hpp similarity index 100% rename from profiler/include/profile_convnd_bwd_data_impl.hpp rename to profiler/include/profiler/profile_convnd_bwd_data_impl.hpp diff --git a/profiler/include/profile_convnd_bwd_weight_impl.hpp b/profiler/include/profiler/profile_convnd_bwd_weight_impl.hpp similarity index 100% rename from profiler/include/profile_convnd_bwd_weight_impl.hpp rename to profiler/include/profiler/profile_convnd_bwd_weight_impl.hpp diff --git a/profiler/include/profile_elementwise_layernorm_impl.hpp b/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp similarity index 100% rename from profiler/include/profile_elementwise_layernorm_impl.hpp rename to profiler/include/profiler/profile_elementwise_layernorm_impl.hpp diff --git a/profiler/include/profile_gemm_add_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp similarity index 100% rename from profiler/include/profile_gemm_add_add_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp diff --git a/profiler/include/profile_gemm_bias_add_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp similarity index 100% rename from profiler/include/profile_gemm_bias_add_reduce_impl.hpp rename to profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp diff --git a/profiler/include/profile_gemm_bilinear_impl.hpp b/profiler/include/profiler/profile_gemm_bilinear_impl.hpp similarity index 100% rename from profiler/include/profile_gemm_bilinear_impl.hpp rename to profiler/include/profiler/profile_gemm_bilinear_impl.hpp diff --git a/profiler/include/profile_gemm_impl.hpp b/profiler/include/profiler/profile_gemm_impl.hpp similarity index 100% rename from profiler/include/profile_gemm_impl.hpp rename to profiler/include/profiler/profile_gemm_impl.hpp diff --git a/profiler/include/profile_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_reduce_impl.hpp similarity index 100% rename from profiler/include/profile_gemm_reduce_impl.hpp rename to profiler/include/profiler/profile_gemm_reduce_impl.hpp diff --git a/profiler/include/profile_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_gemm_splitk_impl.hpp similarity index 100% rename from profiler/include/profile_gemm_splitk_impl.hpp rename to profiler/include/profiler/profile_gemm_splitk_impl.hpp diff --git a/profiler/include/profile_grouped_conv_bwd_weight_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp similarity index 100% rename from profiler/include/profile_grouped_conv_bwd_weight_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp diff --git a/profiler/include/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp similarity index 100% rename from profiler/include/profile_grouped_conv_fwd_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp diff --git a/profiler/include/profile_grouped_gemm_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_impl.hpp similarity index 100% rename from profiler/include/profile_grouped_gemm_impl.hpp rename to profiler/include/profiler/profile_grouped_gemm_impl.hpp diff --git a/profiler/include/profile_groupnorm_impl.hpp b/profiler/include/profiler/profile_groupnorm_impl.hpp similarity index 100% rename from profiler/include/profile_groupnorm_impl.hpp rename to profiler/include/profiler/profile_groupnorm_impl.hpp diff --git a/profiler/include/profile_layernorm_impl.hpp b/profiler/include/profiler/profile_layernorm_impl.hpp similarity index 100% rename from profiler/include/profile_layernorm_impl.hpp rename to profiler/include/profiler/profile_layernorm_impl.hpp diff --git a/profiler/include/profile_reduce_impl.hpp b/profiler/include/profiler/profile_reduce_impl.hpp similarity index 100% rename from profiler/include/profile_reduce_impl.hpp rename to profiler/include/profiler/profile_reduce_impl.hpp diff --git a/profiler/include/profile_softmax_impl.hpp b/profiler/include/profiler/profile_softmax_impl.hpp similarity index 100% rename from profiler/include/profile_softmax_impl.hpp rename to profiler/include/profiler/profile_softmax_impl.hpp diff --git a/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt new file mode 100644 index 0000000000..51d039526f --- /dev/null +++ b/profiler/src/CMakeLists.txt @@ -0,0 +1,63 @@ +# ckProfiler +set(PROFILER_SOURCES + profiler.cpp + profile_gemm.cpp + profile_gemm_splitk.cpp + profile_gemm_bilinear.cpp + profile_gemm_bias_add_reduce.cpp + profile_gemm_add_add_fastgelu.cpp + profile_gemm_reduce.cpp + profile_batched_gemm.cpp + profile_batched_gemm_gemm.cpp + profile_batched_gemm_add_relu_gemm_add.cpp + profile_batched_gemm_reduce.cpp + profile_grouped_gemm.cpp + profile_conv_fwd.cpp + profile_conv_fwd_bias_relu.cpp + profile_conv_fwd_bias_relu_add.cpp + profile_conv_bwd_data.cpp + profile_grouped_conv_fwd.cpp + profile_grouped_conv_bwd_weight.cpp + profile_reduce.cpp + profile_groupnorm.cpp + profile_layernorm.cpp + profile_softmax.cpp + profile_batchnorm_fwd.cpp + profile_batchnorm_bwd.cpp +) + +set(PROFILER_EXECUTABLE ckProfiler) + +add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES}) +target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors) + +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_add_fastgelu_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_reduce_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bias_add_reduce_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_gemm_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_add_relu_gemm_add_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_fwd_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_fwd_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv1d_bwd_data_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_bwd_data_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv3d_bwd_data_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_bwd_weight_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_weight_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_weight_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_add_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_softmax_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_reduce_instance) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batchnorm_instance) + +rocm_install(TARGETS ${PROFILER_EXECUTABLE} COMPONENT profiler) diff --git a/profiler/src/profile_batched_gemm.cpp b/profiler/src/profile_batched_gemm.cpp index 7c4e2f7b7d..907a373794 100644 --- a/profiler/src/profile_batched_gemm.cpp +++ b/profiler/src/profile_batched_gemm.cpp @@ -7,7 +7,8 @@ #include #include -#include "profiler/include/profile_batched_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_impl.hpp" +#include "profiler_operation_registry.hpp" enum struct GemmMatrixLayout { @@ -25,12 +26,15 @@ enum struct GemmDataType INT8_INT8_INT8, // 3 }; +#define OP_NAME "batched_gemm" +#define OP_DESC "Batched GEMM" + int profile_batched_gemm(int argc, char* argv[]) { if(argc != 18) { // clang-format off - printf("arg1: tensor operation (batched_gemm: Batched GEMM)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)\n"); printf("arg3: matrix layout (0: A[g, m, k] * B[g, k, n] = C[g, m, n];\n"); printf(" 1: A[g, m, k] * B[g, n, k] = C[g, m, n];\n"); @@ -195,3 +199,5 @@ int profile_batched_gemm(int argc, char* argv[]) return 1; } } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm); diff --git a/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp b/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp index 1aca388715..f440a3094e 100644 --- a/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp +++ b/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_batched_gemm_add_relu_gemm_add_impl.hpp" +#include "profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp" +#include "profiler_operation_registry.hpp" using F16 = ck::half_t; using F32 = float; @@ -14,6 +15,9 @@ using F32 = float; using Row = ck::tensor_layout::gemm::RowMajor; using Col = ck::tensor_layout::gemm::ColumnMajor; +#define OP_NAME "batched_gemm_add_relu_gemm_add" +#define OP_DESC "Batched GEMM+Add+Relu+GEMM+Add" + int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[]) { enum struct GemmMatrixLayout @@ -109,8 +113,7 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[]) } else { - printf("arg1: tensor operation (batched_gemm_add_relu_gemm_add: " - "Batched_GEMM+Add+Relu+Gemm+Add)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (1: fp16)\n"); printf("arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] " "= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = " @@ -207,3 +210,5 @@ int profile_batched_gemm_add_relu_gemm_add(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm_add_relu_gemm_add); diff --git a/profiler/src/profile_batched_gemm_gemm.cpp b/profiler/src/profile_batched_gemm_gemm.cpp index a28c494a0e..6015c93be3 100644 --- a/profiler/src/profile_batched_gemm_gemm.cpp +++ b/profiler/src/profile_batched_gemm_gemm.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_batched_gemm_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_gemm_impl.hpp" +#include "profiler_operation_registry.hpp" using F16 = ck::half_t; using F32 = float; @@ -14,6 +15,9 @@ using F32 = float; using Row = ck::tensor_layout::gemm::RowMajor; using Col = ck::tensor_layout::gemm::ColumnMajor; +#define OP_NAME "batched_gemm_gemm" +#define OP_DESC "Batched GEMM+GEMM" + int profile_batched_gemm_gemm(int argc, char* argv[]) { enum struct GemmMatrixLayout @@ -101,7 +105,7 @@ int profile_batched_gemm_gemm(int argc, char* argv[]) } else { - printf("arg1: tensor operation (batched_gemm_gemm: Batched_GEMM+Gemm)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (1: fp16)\n"); printf("arg3: matrix layout (0: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[n, o] + D1[m, o] " "= E1[m, o]; 1: Relu(A0[m, k] * B0[n, k] + D0[m, n]) * B1[o, n] + D1[m, o] = E1[m, " @@ -179,3 +183,5 @@ int profile_batched_gemm_gemm(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm_gemm); diff --git a/profiler/src/profile_batched_gemm_reduce.cpp b/profiler/src/profile_batched_gemm_reduce.cpp index d734b5d87b..6b1dfc0142 100644 --- a/profiler/src/profile_batched_gemm_reduce.cpp +++ b/profiler/src/profile_batched_gemm_reduce.cpp @@ -6,7 +6,11 @@ #include #include -#include "profiler/include/profile_batched_gemm_reduce_impl.hpp" +#include "profiler/profile_batched_gemm_reduce_impl.hpp" +#include "profiler_operation_registry.hpp" + +#define OP_NAME "batched_gemm_reduce" +#define OP_DESC "Batched GEMM+Reduce" int profile_batched_gemm_reduce(int argc, char* argv[]) { @@ -26,7 +30,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) if(argc != 15) { - printf("arg1: tensor operation (batched_gemm_reduce: BatchedGEMM+Reduce)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); @@ -151,3 +155,5 @@ int profile_batched_gemm_reduce(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm_reduce); diff --git a/profiler/src/profile_batchnorm_bwd.cpp b/profiler/src/profile_batchnorm_bwd.cpp index d5938a1e6b..44ce7350ff 100644 --- a/profiler/src/profile_batchnorm_bwd.cpp +++ b/profiler/src/profile_batchnorm_bwd.cpp @@ -6,7 +6,8 @@ #include #include "ck/library/utility/host_common_util.hpp" -#include "profiler/include/profile_batchnorm_backward_impl.hpp" +#include "profiler/profile_batchnorm_backward_impl.hpp" +#include "profiler_operation_registry.hpp" using ck::index_t; @@ -202,3 +203,5 @@ int profile_batchnorm_backward(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION("bnorm_bwd", "Batchnorm backward", profile_batchnorm_backward); diff --git a/profiler/src/profile_batchnorm_fwd.cpp b/profiler/src/profile_batchnorm_fwd.cpp index db443e5d7b..902a1fc423 100644 --- a/profiler/src/profile_batchnorm_fwd.cpp +++ b/profiler/src/profile_batchnorm_fwd.cpp @@ -6,7 +6,8 @@ #include #include "ck/library/utility/host_common_util.hpp" -#include "profiler/include/profile_batchnorm_forward_impl.hpp" +#include "profiler/profile_batchnorm_forward_impl.hpp" +#include "profiler_operation_registry.hpp" using ck::index_t; @@ -214,3 +215,5 @@ int profile_batchnorm_forward(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION("bnorm_fwd", "Batchnorm forward", profile_batchnorm_forward); diff --git a/profiler/src/profile_conv_bwd_data.cpp b/profiler/src/profile_conv_bwd_data.cpp index cf42afd2aa..9241ead738 100644 --- a/profiler/src/profile_conv_bwd_data.cpp +++ b/profiler/src/profile_conv_bwd_data.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_conv_bwd_data_impl.hpp" +#include "profiler/profile_conv_bwd_data_impl.hpp" +#include "profiler_operation_registry.hpp" namespace { @@ -24,10 +25,13 @@ enum struct ConvDataType INT8_INT8_INT8, // 3 }; +#define OP_NAME "conv_bwd_data" +#define OP_DESC "Convolution Backward Data" + static void print_helper_msg() { std::cout - << "arg1: tensor operation (conv_bwd_data: Convolution Backward Data)\n" + << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << " 1: Input fp16, Weight fp16, Output fp16\n" << " 2: Input bf16, Weight bf16, Output bf16\n" @@ -182,3 +186,5 @@ int profile_conv_bwd_data(int argc, char* argv[]) return 1; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_bwd_data); diff --git a/profiler/src/profile_conv_fwd.cpp b/profiler/src/profile_conv_fwd.cpp index 72b6a6b629..b57ee7fd94 100644 --- a/profiler/src/profile_conv_fwd.cpp +++ b/profiler/src/profile_conv_fwd.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_conv_fwd_impl.hpp" +#include "profiler/profile_conv_fwd_impl.hpp" +#include "profiler_operation_registry.hpp" namespace { @@ -24,11 +25,14 @@ enum struct ConvDataType INT8_INT8_INT8, // 3 }; +#define OP_NAME "conv_fwd" +#define OP_DESC "Convolution Forward" + static void print_helper_msg() { std::cout // clang-format-off - << "arg1: tensor operation (conv_fwd: Convolution Forward)\n" + << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << " 1: Input fp16, Weight fp16, Output fp16\n" << " 2: Input bf16, Weight bf16, Output bf16\n" @@ -184,3 +188,5 @@ int profile_conv_fwd(int argc, char* argv[]) return 1; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_fwd); diff --git a/profiler/src/profile_conv_fwd_bias_relu.cpp b/profiler/src/profile_conv_fwd_bias_relu.cpp index 91f4836a2b..b44007cde4 100644 --- a/profiler/src/profile_conv_fwd_bias_relu.cpp +++ b/profiler/src/profile_conv_fwd_bias_relu.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_conv_fwd_bias_relu_impl.hpp" +#include "profiler/profile_conv_fwd_bias_relu_impl.hpp" +#include "profiler_operation_registry.hpp" enum struct ConvDataType { @@ -32,11 +33,14 @@ enum struct ConvOutputLayout NHWK, // 1 }; +#define OP_NAME "conv_fwd_bias_relu" +#define OP_DESC "Convolution Forward+Bias+ReLU" + int profile_conv_fwd_bias_relu(int argc, char* argv[]) { if(argc != 25) { - printf("arg1: tensor operation (conv_fwd_bias_relu: ForwardConvolution+Bias+ReLu)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n"); printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n"); @@ -114,3 +118,5 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_fwd_bias_relu); diff --git a/profiler/src/profile_conv_fwd_bias_relu_add.cpp b/profiler/src/profile_conv_fwd_bias_relu_add.cpp index 5cc6faba34..408dd02f78 100644 --- a/profiler/src/profile_conv_fwd_bias_relu_add.cpp +++ b/profiler/src/profile_conv_fwd_bias_relu_add.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp" +#include "profiler/profile_conv_fwd_bias_relu_add_impl.hpp" +#include "profiler_operation_registry.hpp" enum struct ConvDataType { @@ -32,12 +33,14 @@ enum struct ConvOutputLayout NHWK, // 1 }; +#define OP_NAME "conv_fwd_bias_relu_add" +#define OP_DESC "Convolution Forward+Bias+ReLU+Add" + int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) { if(argc != 25) { - printf( - "arg1: tensor operation (conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLu+Add)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n"); printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n"); @@ -115,3 +118,5 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_fwd_bias_relu_add); diff --git a/profiler/src/profile_gemm.cpp b/profiler/src/profile_gemm.cpp index 70219c4c8c..61bae6ae70 100644 --- a/profiler/src/profile_gemm.cpp +++ b/profiler/src/profile_gemm.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_gemm_impl.hpp" +#include "profiler/profile_gemm_impl.hpp" +#include "profiler_operation_registry.hpp" enum struct GemmMatrixLayout { @@ -24,9 +25,12 @@ enum struct GemmDataType INT8_INT8_INT8, // 3 }; +#define OP_NAME "gemm" +#define OP_DESC "GEMM" + static void print_helper_msg() { - std::cout << "arg1: tensor operation (gemm: GEMM)\n" + std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" << "arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n" << "arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n" << " 1: A[m, k] * B[n, k] = C[m, n];\n" @@ -184,3 +188,5 @@ int profile_gemm(int argc, char* argv[]) return 1; } } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm); diff --git a/profiler/src/profile_gemm_add_add_fastgelu.cpp b/profiler/src/profile_gemm_add_add_fastgelu.cpp index 8d3d280d7b..c3c0fb7b67 100644 --- a/profiler/src/profile_gemm_add_add_fastgelu.cpp +++ b/profiler/src/profile_gemm_add_add_fastgelu.cpp @@ -6,7 +6,11 @@ #include #include -#include "profiler/include/profile_gemm_add_add_fastgelu_impl.hpp" +#include "profiler/profile_gemm_add_add_fastgelu_impl.hpp" +#include "profiler_operation_registry.hpp" + +#define OP_NAME "gemm_add_add_fastgelu" +#define OP_DESC "GEMM+Add+Add+FastGeLU" int profile_gemm_add_add_fastgelu(int argc, char* argv[]) { @@ -29,7 +33,7 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[]) if(argc != 16) { // clang-format off - printf("arg1: tensor operation (gemm_add_add_fastgelu: GEMM+Add+Add+FastGeLU)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg3: matrix layout (0: E[m, n] = FastGeLU(A[m, k] * B[k, n] + D0[m, n] + D1[m, n]);\n"); printf(" 1: E[m, n] = FastGeLU(A[m, k] * B[n, k] + D0[m, n] + D1[m, n]);\n"); @@ -150,3 +154,5 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[]) return 1; } } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_add_add_fastgelu); diff --git a/profiler/src/profile_gemm_bias_add_reduce.cpp b/profiler/src/profile_gemm_bias_add_reduce.cpp index bc2675703f..6d86db0822 100644 --- a/profiler/src/profile_gemm_bias_add_reduce.cpp +++ b/profiler/src/profile_gemm_bias_add_reduce.cpp @@ -6,7 +6,11 @@ #include #include -#include "profiler/include/profile_gemm_bias_add_reduce_impl.hpp" +#include "profiler/profile_gemm_bias_add_reduce_impl.hpp" +#include "profiler_operation_registry.hpp" + +#define OP_NAME "gemm_bias_add_reduce" +#define OP_DESC "GEMM+Bias+Add+Reduce" int profile_gemm_bias_add_reduce(int argc, char* argv[]) { @@ -26,7 +30,7 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[]) if(!(argc == 14 || argc == 15)) { - printf("arg1: tensor operation (gemm: GEMM+bias+add+Reduce)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); @@ -159,3 +163,5 @@ int profile_gemm_bias_add_reduce(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_bias_add_reduce); diff --git a/profiler/src/profile_gemm_bilinear.cpp b/profiler/src/profile_gemm_bilinear.cpp index 4f7e5a800d..3480014ba6 100644 --- a/profiler/src/profile_gemm_bilinear.cpp +++ b/profiler/src/profile_gemm_bilinear.cpp @@ -6,7 +6,11 @@ #include #include -#include "profiler/include/profile_gemm_bilinear_impl.hpp" +#include "profiler/profile_gemm_bilinear_impl.hpp" +#include "profiler_operation_registry.hpp" + +#define OP_NAME "gemm_bilinear" +#define OP_DESC "GEMM+Bilinear" int profile_gemm_bilinear(int argc, char* argv[]) { @@ -29,7 +33,7 @@ int profile_gemm_bilinear(int argc, char* argv[]) if(argc != 17) { // clang-format off - printf("arg1: tensor operation (gemm_bilinear: GEMM+Bilinear)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg3: matrix layout (0: E[m, n] = alpha * A[m, k] * B[k, n] + beta * D[m, n];\n"); printf(" 1: E[m, n] = alpha * A[m, k] * B[n, k] + beta * D[m, n];\n"); @@ -144,3 +148,5 @@ int profile_gemm_bilinear(int argc, char* argv[]) return 1; } } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_bilinear); diff --git a/profiler/src/profile_gemm_reduce.cpp b/profiler/src/profile_gemm_reduce.cpp index 476943c8a7..395bf0627e 100644 --- a/profiler/src/profile_gemm_reduce.cpp +++ b/profiler/src/profile_gemm_reduce.cpp @@ -6,7 +6,11 @@ #include #include -#include "profiler/include/profile_gemm_reduce_impl.hpp" +#include "profiler/profile_gemm_reduce_impl.hpp" +#include "profiler_operation_registry.hpp" + +#define OP_NAME "gemm_reduce" +#define OP_DESC "GEMM+Reduce" int profile_gemm_reduce(int argc, char* argv[]) { @@ -26,7 +30,7 @@ int profile_gemm_reduce(int argc, char* argv[]) if(!(argc == 14 || argc == 15)) { - printf("arg1: tensor operation (gemm: GEMM+Reduce)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); @@ -146,3 +150,5 @@ int profile_gemm_reduce(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_reduce); diff --git a/profiler/src/profile_gemm_splitk.cpp b/profiler/src/profile_gemm_splitk.cpp index fff023c8e0..f636ce718c 100644 --- a/profiler/src/profile_gemm_splitk.cpp +++ b/profiler/src/profile_gemm_splitk.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_gemm_splitk_impl.hpp" +#include "profiler/profile_gemm_splitk_impl.hpp" +#include "profiler_operation_registry.hpp" enum struct GemmMatrixLayout { @@ -24,11 +25,14 @@ enum struct GemmDataType INT8_INT8_INT8, // 3 }; +#define OP_NAME "gemm_splitk" +#define OP_DESC "Split-K GEMM" + int profile_gemm_splitk(int argc, char* argv[]) { if(argc != 15) { - printf("arg1: tensor operation (gemm_splitk: Split-K GEMM)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); @@ -146,3 +150,5 @@ int profile_gemm_splitk(int argc, char* argv[]) return 1; } } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_splitk); diff --git a/profiler/src/profile_grouped_conv_bwd_weight.cpp b/profiler/src/profile_grouped_conv_bwd_weight.cpp index deb5741cef..dfd8a099f5 100644 --- a/profiler/src/profile_grouped_conv_bwd_weight.cpp +++ b/profiler/src/profile_grouped_conv_bwd_weight.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_grouped_conv_bwd_weight_impl.hpp" +#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp" +#include "profiler_operation_registry.hpp" namespace { @@ -23,9 +24,12 @@ enum struct ConvDataType BF16_F32_BF16, // 2 }; +#define OP_NAME "grouped_conv_bwd_weight" +#define OP_DESC "Grouped Convolution Backward Weight" + static void print_helper_msg() { - std::cout << "arg1: tensor operation (conv_bwd_weight: Convolution Backward Weight\n" + std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << " 1: Input fp16, Weight fp16, Output fp16\n" << " 2: Input bf16, Weight fp32, Output bf16)\n" @@ -174,3 +178,5 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[]) return 1; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_conv_bwd_weight); diff --git a/profiler/src/profile_grouped_conv_fwd.cpp b/profiler/src/profile_grouped_conv_fwd.cpp index cb7c69b473..9ff3c15af0 100644 --- a/profiler/src/profile_grouped_conv_fwd.cpp +++ b/profiler/src/profile_grouped_conv_fwd.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_grouped_conv_fwd_impl.hpp" +#include "profiler/profile_grouped_conv_fwd_impl.hpp" +#include "profiler_operation_registry.hpp" namespace { @@ -24,11 +25,14 @@ enum struct ConvDataType INT8_INT8_INT8, // 3 }; +#define OP_NAME "grouped_conv_fwd" +#define OP_DESC "Grouped Convolution Forward" + static void print_helper_msg() { std::cout // clang-format off - << "arg1: tensor operation (grouped_conv_fwd: Grouped Convolution Forward)\n" + << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n" << " 1: Input fp16, Weight fp16, Output fp16\n" << " 2: Input bf16, Weight bf16, Output bf16\n" @@ -252,3 +256,5 @@ int profile_grouped_conv_fwd(int argc, char* argv[]) return 1; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_conv_fwd); diff --git a/profiler/src/profile_grouped_gemm.cpp b/profiler/src/profile_grouped_gemm.cpp index 1e24c6091b..65e24bd9cc 100644 --- a/profiler/src/profile_grouped_gemm.cpp +++ b/profiler/src/profile_grouped_gemm.cpp @@ -6,7 +6,8 @@ #include #include -#include "profiler/include/profile_grouped_gemm_impl.hpp" +#include "profiler/profile_grouped_gemm_impl.hpp" +#include "profiler_operation_registry.hpp" enum struct GemmMatrixLayout { @@ -44,11 +45,14 @@ std::vector argToIntArray(char* input) return out; } +#define OP_NAME "grouped_gemm" +#define OP_DESC "Grouped GEMM" + int profile_grouped_gemm(int argc, char* argv[]) { if(!(argc == 14)) { - printf("arg1: tensor operation (grouped_gemm: Grouped GEMM)\n"); + printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); @@ -161,3 +165,5 @@ int profile_grouped_gemm(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_gemm); diff --git a/profiler/src/profile_groupnorm.cpp b/profiler/src/profile_groupnorm.cpp index 7eeaca7d45..2741f52717 100644 --- a/profiler/src/profile_groupnorm.cpp +++ b/profiler/src/profile_groupnorm.cpp @@ -5,8 +5,9 @@ #include #include -#include "profiler/include/data_type_enum.hpp" -#include "profiler/include/profile_groupnorm_impl.hpp" +#include "profiler/data_type_enum.hpp" +#include "profiler/profile_groupnorm_impl.hpp" +#include "profiler_operation_registry.hpp" using ck::index_t; @@ -43,9 +44,12 @@ struct GroupnormArgParser } }; +#define OP_NAME "groupnorm" +#define OP_DESC "Group Normalization" + void print_help_groupnorm() { - std::cout << "arg1: tensor operation (groupnorm: Group normalization)\n" + std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" << "arg2: data type (0: fp16; 1: fp32)\n" << "arg3: verification (0: no; 1: yes)\n" << "arg4: initialization (0: no init; 1: integer value; 2: decimal value)\n" @@ -104,3 +108,5 @@ int profile_groupnorm(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_groupnorm); diff --git a/profiler/src/profile_layernorm.cpp b/profiler/src/profile_layernorm.cpp index b090a4e1c8..e93fc2dbd2 100644 --- a/profiler/src/profile_layernorm.cpp +++ b/profiler/src/profile_layernorm.cpp @@ -5,8 +5,9 @@ #include #include -#include "profiler/include/data_type_enum.hpp" -#include "profiler/include/profile_layernorm_impl.hpp" +#include "profiler/data_type_enum.hpp" +#include "profiler/profile_layernorm_impl.hpp" +#include "profiler_operation_registry.hpp" using ck::index_t; @@ -96,3 +97,5 @@ int profile_layernorm(int argc, char* argv[]) return 0; } + +REGISTER_PROFILER_OPERATION("layernorm", "Layer Normalization", profile_layernorm); diff --git a/profiler/src/profile_reduce.cpp b/profiler/src/profile_reduce.cpp index 1ec2a6d6e6..6925371858 100644 --- a/profiler/src/profile_reduce.cpp +++ b/profiler/src/profile_reduce.cpp @@ -13,8 +13,9 @@ #include "ck/library/utility/host_common_util.hpp" -#include "profiler/include/profile_reduce_impl.hpp" -#include "profiler/include/data_type_enum.hpp" +#include "profiler/profile_reduce_impl.hpp" +#include "profiler/data_type_enum.hpp" +#include "profiler_operation_registry.hpp" using namespace std; @@ -429,3 +430,5 @@ int profile_reduce(int argc, char* argv[]) return (0); }; + +REGISTER_PROFILER_OPERATION("reduce", "Reduce", profile_reduce); diff --git a/profiler/src/profile_softmax.cpp b/profiler/src/profile_softmax.cpp index 0cf4e2b5d5..30f627dd29 100644 --- a/profiler/src/profile_softmax.cpp +++ b/profiler/src/profile_softmax.cpp @@ -5,7 +5,8 @@ #include #include -#include "profiler/include/profile_softmax_impl.hpp" +#include "profiler/profile_softmax_impl.hpp" +#include "profiler_operation_registry.hpp" using ck::index_t; using ck::profiler::SoftmaxDataType; @@ -164,3 +165,5 @@ int profile_softmax(int argc, char* argv[]) // profile_normalization(argc, argv); // return 0; // } + +REGISTER_PROFILER_OPERATION("softmax", "Softmax", profile_softmax); diff --git a/profiler/src/profiler.cpp b/profiler/src/profiler.cpp index 34d0f5409f..080117e390 100644 --- a/profiler/src/profiler.cpp +++ b/profiler/src/profiler.cpp @@ -1,56 +1,14 @@ // SPDX-License-Identifier: MIT // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. -#include +#include +#include -int profile_gemm(int, char*[]); -int profile_gemm_splitk(int, char*[]); -int profile_gemm_bilinear(int, char*[]); -int profile_gemm_add_add_fastgelu(int, char*[]); -int profile_gemm_reduce(int, char*[]); -int profile_gemm_bias_add_reduce(int, char*[]); -int profile_batched_gemm(int, char*[]); -int profile_batched_gemm_gemm(int, char*[]); -int profile_batched_gemm_add_relu_gemm_add(int, char*[]); -int profile_batched_gemm_reduce(int, char*[]); -int profile_grouped_gemm(int, char*[]); -int profile_conv_fwd(int, char*[]); -int profile_conv_fwd_bias_relu(int, char*[]); -int profile_conv_fwd_bias_relu_add(int, char*[]); -int profile_conv_bwd_data(int, char*[]); -int profile_grouped_conv_fwd(int, char*[]); -int profile_grouped_conv_bwd_weight(int, char*[]); -int profile_softmax(int, char*[]); -int profile_layernorm(int, char*[]); -int profile_groupnorm(int, char*[]); -int profile_reduce(int, char*[]); -int profile_batchnorm_forward(int, char*[]); -int profile_batchnorm_backward(int, char*[]); +#include "profiler_operation_registry.hpp" static void print_helper_message() { - // clang-format off - printf("arg1: tensor operation (gemm: GEMM\n" - " gemm_splitk: Split-K GEMM\n" - " gemm_bilinear: GEMM+Bilinear\n" - " gemm_add_add_fastgelu: GEMM+Add+Add+FastGeLU\n" - " gemm_reduce: GEMM+Reduce\n" - " gemm_bias_add_reduce: GEMM+Bias+Add+Reduce\n" - " batched_gemm: Batched GEMM\n" - " batched_gemm_gemm: Batched+GEMM+GEMM\n" - " batched_gemm_add_relu_gemm_add: Batched+GEMM+bias+gelu+GEMM+bias\n" - " batched_gemm_reduce: Batched GEMM+Reduce\n" - " grouped_gemm: Grouped GEMM\n" - " conv_fwd: Convolution Forward\n" - " conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n" - " conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n" - " conv_bwd_data: Convolution Backward Data\n" - " grouped_conv_fwd: Grouped Convolution Forward\n" - " grouped_conv_bwd_weight: Grouped Convolution Backward Weight\n" - " softmax: Softmax\n" - " reduce: Reduce\n" - " bnorm_fwd: Batchnorm forward\n"); - // clang-format on + std::cout << "arg1: tensor operation " << ProfilerOperationRegistry::GetInstance() << std::endl; } int main(int argc, char* argv[]) @@ -58,105 +16,15 @@ int main(int argc, char* argv[]) if(argc == 1) { print_helper_message(); - - return 0; } - else if(strcmp(argv[1], "gemm") == 0) + else if(const auto operation = ProfilerOperationRegistry::GetInstance().Get(argv[1]); + operation.has_value()) { - return profile_gemm(argc, argv); - } - else if(strcmp(argv[1], "gemm_splitk") == 0) - { - return profile_gemm_splitk(argc, argv); - } - else if(strcmp(argv[1], "gemm_bilinear") == 0) - { - return profile_gemm_bilinear(argc, argv); - } - else if(strcmp(argv[1], "gemm_add_add_fastgelu") == 0) - { - return profile_gemm_add_add_fastgelu(argc, argv); - } - else if(strcmp(argv[1], "gemm_reduce") == 0) - { - return profile_gemm_reduce(argc, argv); - } - else if(strcmp(argv[1], "gemm_bias_add_reduce") == 0) - { - return profile_gemm_bias_add_reduce(argc, argv); - } - else if(strcmp(argv[1], "batched_gemm") == 0) - { - return profile_batched_gemm(argc, argv); - } - else if(strcmp(argv[1], "batched_gemm_gemm") == 0) - { - return profile_batched_gemm_gemm(argc, argv); - } - else if(strcmp(argv[1], "batched_gemm_add_relu_gemm_add") == 0) - { - return profile_batched_gemm_add_relu_gemm_add(argc, argv); - } - else if(strcmp(argv[1], "batched_gemm_reduce") == 0) - { - return profile_batched_gemm_reduce(argc, argv); - } - else if(strcmp(argv[1], "grouped_gemm") == 0) - { - return profile_grouped_gemm(argc, argv); - } - else if(strcmp(argv[1], "conv_fwd") == 0) - { - return profile_conv_fwd(argc, argv); - } - else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0) - { - return profile_conv_fwd_bias_relu(argc, argv); - } - else if(strcmp(argv[1], "conv_fwd_bias_relu_add") == 0) - { - return profile_conv_fwd_bias_relu_add(argc, argv); - } - else if(strcmp(argv[1], "conv_bwd_data") == 0) - { - return profile_conv_bwd_data(argc, argv); - } - else if(strcmp(argv[1], "grouped_conv_fwd") == 0) - { - return profile_grouped_conv_fwd(argc, argv); - } - else if(strcmp(argv[1], "conv_bwd_weight") == 0) - { - return profile_grouped_conv_bwd_weight(argc, argv); - } - else if(strcmp(argv[1], "reduce") == 0) - { - return profile_reduce(argc, argv); - } - else if(strcmp(argv[1], "softmax") == 0) - { - return profile_softmax(argc, argv); - } - else if(strcmp(argv[1], "layernorm") == 0) - { - return profile_layernorm(argc, argv); - } - else if(strcmp(argv[1], "groupnorm") == 0) - { - return profile_groupnorm(argc, argv); - } - else if(strcmp(argv[1], "bnorm_fwd") == 0) - { - return profile_batchnorm_forward(argc, argv); - } - else if(strcmp(argv[1], "bnorm_bwd") == 0) - { - return profile_batchnorm_backward(argc, argv); + return (*operation)(argc, argv); } else { - print_helper_message(); - - return 0; + std::cerr << "cannot find operation: " << argv[1] << std::endl; + return EXIT_FAILURE; } } diff --git a/profiler/src/profiler_operation_registry.hpp b/profiler/src/profiler_operation_registry.hpp new file mode 100644 index 0000000000..91ff291233 --- /dev/null +++ b/profiler/src/profiler_operation_registry.hpp @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. + +#include +#include +#include +#include +#include +#include +#include + +class ProfilerOperationRegistry final +{ + ProfilerOperationRegistry() = default; + ~ProfilerOperationRegistry() = default; + + public: + using Operation = std::function; + + private: + struct Entry final + { + explicit Entry(std::string_view description, Operation operation) noexcept + : description_(description), operation_(std::move(operation)) + { + } + + std::string_view description_; + Operation operation_; + }; + + std::map entries_; + + friend std::ostream& operator<<(std::ostream& stream, const ProfilerOperationRegistry& registry) + { + stream << "{\n"; + for(auto& [name, entry] : registry.entries_) + { + stream << "\t" << name << ": " << entry.description_ << "\n"; + } + stream << "}"; + + return stream; + } + + public: + static ProfilerOperationRegistry& GetInstance() + { + static ProfilerOperationRegistry registry; + return registry; + } + + std::optional Get(std::string_view name) const + { + const auto found = entries_.find(name); + if(found == end(entries_)) + { + return std::nullopt; + } + + return (found->second).operation_; + } + + bool Add(std::string_view name, std::string_view description, Operation operation) + { + return entries_ + .emplace(std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple(description, std::move(operation))) + .second; + } +}; + +#define PP_CONCAT(x, y) PP_CONCAT_IMPL(x, y) +#define PP_CONCAT_IMPL(x, y) x##y + +#define REGISTER_PROFILER_OPERATION(name, description, operation) \ + static const bool PP_CONCAT(operation_registration_result_, __COUNTER__) = \ + ::ProfilerOperationRegistry::GetInstance().Add(name, description, operation) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a3d2bcdc82..a8347d9e38 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,6 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}/ + ${PROJECT_SOURCE_DIR}/profiler/include ) include(googletest) diff --git a/test/batched_gemm/batched_gemm_bf16.cpp b/test/batched_gemm/batched_gemm_bf16.cpp index 698e9faada..78be540627 100644 --- a/test/batched_gemm/batched_gemm_bf16.cpp +++ b/test/batched_gemm/batched_gemm_bf16.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_batched_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_impl.hpp" namespace { using ADataType = ck::bhalf_t; diff --git a/test/batched_gemm/batched_gemm_fp16.cpp b/test/batched_gemm/batched_gemm_fp16.cpp index 7fc1f24f5f..6cbbedf677 100644 --- a/test/batched_gemm/batched_gemm_fp16.cpp +++ b/test/batched_gemm/batched_gemm_fp16.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_batched_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_impl.hpp" namespace { using ADataType = ck::half_t; diff --git a/test/batched_gemm/batched_gemm_fp32.cpp b/test/batched_gemm/batched_gemm_fp32.cpp index 59072acc50..c9e565e264 100644 --- a/test/batched_gemm/batched_gemm_fp32.cpp +++ b/test/batched_gemm/batched_gemm_fp32.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_batched_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_impl.hpp" namespace { using ADataType = float; diff --git a/test/batched_gemm/batched_gemm_int8.cpp b/test/batched_gemm/batched_gemm_int8.cpp index b68649ddf7..4da941a576 100644 --- a/test/batched_gemm/batched_gemm_int8.cpp +++ b/test/batched_gemm/batched_gemm_int8.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_batched_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_impl.hpp" namespace { using ADataType = int8_t; diff --git a/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp index d7fbc37f01..53c4d37c44 100644 --- a/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp +++ b/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp @@ -6,7 +6,7 @@ #include #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" -#include "profiler/include/profile_batched_gemm_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_gemm_impl.hpp" using ck::tensor_operation::device::GemmSpecialization; diff --git a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp index 456d21142f..b150ce50d1 100644 --- a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp +++ b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_batched_gemm_reduce_impl.hpp" +#include "profiler/profile_batched_gemm_reduce_impl.hpp" int main() { diff --git a/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp index e9fd514cce..98debe19c3 100644 --- a/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp +++ b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp @@ -6,7 +6,7 @@ #include #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp" -#include "profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp" +#include "profiler/profile_batched_gemm_softmax_gemm_impl.hpp" using ck::tensor_operation::device::GemmSpecialization; template diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp index 138b9f8ffc..912bbc91ed 100644 --- a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp +++ b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp @@ -7,7 +7,7 @@ #include "ck/ck.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp" -#include "profiler/include/profile_batched_gemm_softmax_gemm_permute_impl.hpp" +#include "profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp" using ck::tensor_operation::device::GemmSpecialization; using ck::tensor_operation::device::MaskingSpecialization; diff --git a/test/batchnorm/batchnorm_bwd_rank_4.cpp b/test/batchnorm/batchnorm_bwd_rank_4.cpp index 77590626dc..caa7331ea2 100644 --- a/test/batchnorm/batchnorm_bwd_rank_4.cpp +++ b/test/batchnorm/batchnorm_bwd_rank_4.cpp @@ -8,7 +8,7 @@ #include #include -#include "profiler/include/profile_batchnorm_backward_impl.hpp" +#include "profiler/profile_batchnorm_backward_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/batchnorm/batchnorm_fwd_rank_4.cpp b/test/batchnorm/batchnorm_fwd_rank_4.cpp index bc820be462..13aef7d6bf 100644 --- a/test/batchnorm/batchnorm_fwd_rank_4.cpp +++ b/test/batchnorm/batchnorm_fwd_rank_4.cpp @@ -8,7 +8,7 @@ #include #include -#include "profiler/include/profile_batchnorm_forward_impl.hpp" +#include "profiler/profile_batchnorm_forward_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/convnd_bwd_data/convnd_bwd_data.cpp b/test/convnd_bwd_data/convnd_bwd_data.cpp index c31e399ef6..70231d42ae 100644 --- a/test/convnd_bwd_data/convnd_bwd_data.cpp +++ b/test/convnd_bwd_data/convnd_bwd_data.cpp @@ -8,7 +8,7 @@ #include #include -#include "profiler/include/profile_conv_bwd_data_impl.hpp" +#include "profiler/profile_conv_bwd_data_impl.hpp" template class TestConvndBwdData : public ::testing::Test diff --git a/test/convnd_fwd/convnd_fwd.cpp b/test/convnd_fwd/convnd_fwd.cpp index 7a9782ebc0..a1921a9bfb 100644 --- a/test/convnd_fwd/convnd_fwd.cpp +++ b/test/convnd_fwd/convnd_fwd.cpp @@ -8,7 +8,7 @@ #include #include -#include "profiler/include/profile_conv_fwd_impl.hpp" +#include "profiler/profile_conv_fwd_impl.hpp" template class TestConvndFwd : public ::testing::Test diff --git a/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp b/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp index f01e963bdb..403881b3cc 100644 --- a/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp +++ b/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. #include "gtest/gtest.h" -#include "profiler/include/profile_elementwise_layernorm_impl.hpp" +#include "profiler/profile_elementwise_layernorm_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/gemm_reduce/gemm_reduce_fp16.cpp b/test/gemm_reduce/gemm_reduce_fp16.cpp index 16f787e07e..029165ece1 100644 --- a/test/gemm_reduce/gemm_reduce_fp16.cpp +++ b/test/gemm_reduce/gemm_reduce_fp16.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_gemm_reduce_impl.hpp" +#include "profiler/profile_gemm_reduce_impl.hpp" int main() { diff --git a/test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp b/test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp index 1fc9c50d1e..e14173cb41 100644 --- a/test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp +++ b/test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp @@ -9,7 +9,7 @@ #include -#include "profiler/include/profile_grouped_conv_bwd_weight_impl.hpp" +#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp" template class TestGroupedConvndBwdWeight : public ::testing::Test diff --git a/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp index fbd6e9972f..6df7f9969c 100644 --- a/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp +++ b/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp @@ -7,7 +7,7 @@ #include #include -#include "profiler/include/profile_grouped_conv_fwd_impl.hpp" +#include "profiler/profile_grouped_conv_fwd_impl.hpp" class TestGroupedConvNdFwd : public ::testing::Test { diff --git a/test/grouped_gemm/grouped_gemm_fp16.cpp b/test/grouped_gemm/grouped_gemm_fp16.cpp index f81875ab73..b3f7cca418 100644 --- a/test/grouped_gemm/grouped_gemm_fp16.cpp +++ b/test/grouped_gemm/grouped_gemm_fp16.cpp @@ -3,7 +3,7 @@ #include -#include "profiler/include/profile_grouped_gemm_impl.hpp" +#include "profiler/profile_grouped_gemm_impl.hpp" namespace { diff --git a/test/normalization/test_groupnorm_fp16.cpp b/test/normalization/test_groupnorm_fp16.cpp index 8f7438247c..636e522dce 100644 --- a/test/normalization/test_groupnorm_fp16.cpp +++ b/test/normalization/test_groupnorm_fp16.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. #include "gtest/gtest.h" -#include "profiler/include/profile_groupnorm_impl.hpp" +#include "profiler/profile_groupnorm_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/normalization/test_groupnorm_fp32.cpp b/test/normalization/test_groupnorm_fp32.cpp index 8dadbb60f8..ef492664bf 100644 --- a/test/normalization/test_groupnorm_fp32.cpp +++ b/test/normalization/test_groupnorm_fp32.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. #include "gtest/gtest.h" -#include "profiler/include/profile_groupnorm_impl.hpp" +#include "profiler/profile_groupnorm_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/normalization/test_layernorm2d_fp16.cpp b/test/normalization/test_layernorm2d_fp16.cpp index 7e3af7135e..eeb8ec150a 100644 --- a/test/normalization/test_layernorm2d_fp16.cpp +++ b/test/normalization/test_layernorm2d_fp16.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. #include "gtest/gtest.h" -#include "profiler/include/profile_layernorm_impl.hpp" +#include "profiler/profile_layernorm_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/normalization/test_layernorm2d_fp32.cpp b/test/normalization/test_layernorm2d_fp32.cpp index a7c4380d59..f555b42592 100644 --- a/test/normalization/test_layernorm2d_fp32.cpp +++ b/test/normalization/test_layernorm2d_fp32.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. #include "gtest/gtest.h" -#include "profiler/include/profile_layernorm_impl.hpp" +#include "profiler/profile_layernorm_impl.hpp" using F16 = ck::half_t; using F32 = float; diff --git a/test/reduce/reduce_no_index.cpp b/test/reduce/reduce_no_index.cpp index 475ebfd080..3f4d0676b4 100644 --- a/test/reduce/reduce_no_index.cpp +++ b/test/reduce/reduce_no_index.cpp @@ -4,7 +4,7 @@ #include #include "ck/library/utility/host_common_util.hpp" -#include "profiler/include/profile_reduce_impl.hpp" +#include "profiler/profile_reduce_impl.hpp" using namespace ck; diff --git a/test/reduce/reduce_with_index.cpp b/test/reduce/reduce_with_index.cpp index c319dca69c..c616a68e74 100644 --- a/test/reduce/reduce_with_index.cpp +++ b/test/reduce/reduce_with_index.cpp @@ -4,7 +4,7 @@ #include #include "ck/library/utility/host_common_util.hpp" -#include "profiler/include/profile_reduce_impl.hpp" +#include "profiler/profile_reduce_impl.hpp" using namespace ck; diff --git a/test/softmax/test_softmax_util.hpp b/test/softmax/test_softmax_util.hpp index 23ac3d20e2..40b300cf99 100644 --- a/test/softmax/test_softmax_util.hpp +++ b/test/softmax/test_softmax_util.hpp @@ -13,7 +13,7 @@ #include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "include/ck/utility/data_type.hpp" -#include "profiler/include/profile_softmax_impl.hpp" +#include "profiler/profile_softmax_impl.hpp" namespace ck {