Make relevant profilers print the number of valid instances to aid testing.

This commit is contained in:
kiefer
2025-08-20 10:48:41 +00:00
parent 43f99d85bd
commit 4354cefbca
2 changed files with 9 additions and 0 deletions

View File

@@ -192,6 +192,7 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int valids = 0;
// profile device op instances
bool pass = true;
@@ -207,6 +208,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
valids++;
std::string op_name = op_ptr->GetTypeString();
auto invoker_ptr = op_ptr->MakeInvokerPointer();
@@ -312,6 +315,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
run_impl(op_ptr, argument_ptr);
}
printf("\033[36mvalids: %d\n\033[0m", valids);
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;

View File

@@ -144,6 +144,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int valids = 0;
// profile device op instances
bool pass = true;
@@ -157,6 +158,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
std::string op_name = op_ptr->GetTypeString();
valids++;
auto invoker_ptr = op_ptr->MakeInvokerPointer();
@@ -250,6 +252,8 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
run_impl(op_ptr, argument_ptr);
}
printf("\033[36mvalids: %d\033[0m\n", valids);
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;