From 4354cefbcaedba91ebc36dc963b09eca477f6cb7 Mon Sep 17 00:00:00 2001 From: kiefer Date: Wed, 20 Aug 2025 10:48:41 +0000 Subject: [PATCH] Make relevant profilers print the number of valid instances to aid testing. --- .../profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp | 5 +++++ profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp index d0e1cf2611..ec9a0c989e 100644 --- a/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp +++ b/profiler/include/profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp @@ -192,6 +192,7 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, float best_avg_time = 0; float best_tflops = 0; float best_gb_per_sec = 0; + int valids = 0; // profile device op instances bool pass = true; @@ -207,6 +208,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, // re-init output to zero before profiling next kernel out_device_buf.SetZero(); + valids++; + std::string op_name = op_ptr->GetTypeString(); auto invoker_ptr = op_ptr->MakeInvokerPointer(); @@ -312,6 +315,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification, run_impl(op_ptr, argument_ptr); } + printf("\033[36mvalids: %d\n\033[0m", valids); + std::cout << "Best configuration parameters:" << "\nname: " << best_op_name << "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops << "\nGB/s: " << best_gb_per_sec << std::endl; diff --git a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp index 2dcee4c1fc..2d507aab18 100644 --- a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp +++ b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp @@ -144,6 +144,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification, float best_avg_time = 0; float best_tflops = 0; float best_gb_per_sec = 0; + int valids = 0; // profile device op instances bool pass = true; @@ -157,6 +158,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification, if(op_ptr->IsSupportedArgument(argument_ptr.get())) { std::string op_name = op_ptr->GetTypeString(); + valids++; auto invoker_ptr = op_ptr->MakeInvokerPointer(); @@ -250,6 +252,8 @@ bool profile_grouped_conv_fwd_impl(int do_verification, run_impl(op_ptr, argument_ptr); } + printf("\033[36mvalids: %d\033[0m\n", valids); + std::cout << "Best configuration parameters:" << "\nname: " << best_op_name << "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops << "\nGB/s: " << best_gb_per_sec << std::endl;