From 4b1b6eff8fd7495ad3e77b1a3c17b459dd33d672 Mon Sep 17 00:00:00 2001 From: Adam Osewski <19374865+aosewski@users.noreply.github.com> Date: Tue, 17 Dec 2024 09:19:44 +0100 Subject: [PATCH] Enhance printing functionality (#1751) * Added object print with all template parameters * fix clang format --------- Co-authored-by: ravil-mobile Co-authored-by: illsilin [ROCm/composable_kernel commit: d46196f291a33539a089d7d09bcbc4d2270733c2] --- .../gpu/device/device_base.hpp | 34 + .../impl/device_gemm_xdl_cshuffle_v3.hpp | 1 + ...m_sn_uk_gfx9_32x128x512_1x4x1_16x16x16.inc | 1383 +++++++++------- ..._uk_gfx9_32x128x512_1x4x1_16x16x16_itl.inc | 1439 +++++++++-------- ...atmm_uk_gfx9_32x512x128_1x1x1_16x16x16.inc | 1007 ++++++------ .../profiler/profile_gemm_universal_impl.hpp | 18 +- 6 files changed, 2095 insertions(+), 1787 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/device_base.hpp b/include/ck/tensor_operation/gpu/device/device_base.hpp index 908ada016d..736e241fdf 100644 --- a/include/ck/tensor_operation/gpu/device/device_base.hpp +++ b/include/ck/tensor_operation/gpu/device/device_base.hpp @@ -5,6 +5,8 @@ #include #include +#include +#include #include "ck/stream_config.hpp" @@ -12,6 +14,34 @@ namespace ck { namespace tensor_operation { namespace device { +#define GET_OBJECT_NAME_IMLP \ + std::optional GetObjectName() const override \ + { \ + std::string str = __PRETTY_FUNCTION__; \ + static std::regex obj_name_expr{" (.*)::GetObjectName"}; \ + std::smatch match; \ + if(!std::regex_search(str, match, obj_name_expr)) \ + { \ + return str; \ + } \ + return std::string(match[1]) + ';'; \ + } + +#define GET_TEMPLATE_INFO_IMPL \ + std::optional GetTemplateInfo() const override \ + { \ + std::string str = __PRETTY_FUNCTION__; \ + static std::regex template_expr{"\\[(.*)\\]"}; \ + std::smatch match; \ + if(!std::regex_search(str, match, template_expr)) \ + { \ + return std::nullopt; \ + } \ + return std::string(match[1]); \ + } + +#define REGISTER_EXTRA_PRINTING_METHODS GET_OBJECT_NAME_IMLP GET_TEMPLATE_INFO_IMPL + struct BaseArgument { BaseArgument() = default; @@ -48,6 +78,10 @@ struct BaseOperator virtual std::string GetTypeIdName() const { return typeid(*this).name(); } + virtual std::optional GetObjectName() const { return std::nullopt; } + + virtual std::optional GetTemplateInfo() const { return std::nullopt; } + virtual std::string GetTypeIdHashCode() const { std::ostringstream oss; diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v3.hpp index 4489b2e5ce..ad6aa1e7c3 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v3.hpp @@ -729,6 +729,7 @@ struct DeviceGemm_Xdl_CShuffleV3 : public DeviceGemmV2 best_op_object_name; float best_ave_time = 0; float best_tflops = 0; float best_gb_per_sec = 0; @@ -225,7 +226,8 @@ bool profile_gemm_universal_impl(int do_verification, } } - std::string op_name = op_ptr->GetTypeString(); + std::string op_name = op_ptr->GetTypeString(); + std::optional op_obj_name = op_ptr->GetObjectName(); float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, @@ -251,11 +253,12 @@ bool profile_gemm_universal_impl(int do_verification, if(tflops > best_tflops && ave_time > 1e-10) { - best_op_name = op_name; - best_tflops = tflops; - best_ave_time = ave_time; - best_gb_per_sec = gb_per_sec; - best_kbatch = kbatch_curr; + best_op_name = op_name; + best_op_object_name = op_obj_name; + best_tflops = tflops; + best_ave_time = ave_time; + best_gb_per_sec = gb_per_sec; + best_kbatch = kbatch_curr; } } else @@ -306,6 +309,9 @@ bool profile_gemm_universal_impl(int do_verification, << " : " << best_ave_time << " ms, " << best_tflops << " TFlops, " << best_gb_per_sec << " GB/s, " << best_op_name << std::endl; + if(best_op_object_name) + std::cout << best_op_object_name.value() << std::endl; + return pass; }