mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
Enhance printing functionality (#1751)
* Added object print with all template parameters * fix clang format --------- Co-authored-by: ravil-mobile <ravil.aviva.com@gmail.com> Co-authored-by: illsilin <Illia.Silin@amd.com>
This commit is contained in:
@@ -144,6 +144,7 @@ bool profile_gemm_universal_impl(int do_verification,
|
||||
}
|
||||
|
||||
std::string best_op_name;
|
||||
std::optional<std::string> best_op_object_name;
|
||||
float best_ave_time = 0;
|
||||
float best_tflops = 0;
|
||||
float best_gb_per_sec = 0;
|
||||
@@ -225,7 +226,8 @@ bool profile_gemm_universal_impl(int do_verification,
|
||||
}
|
||||
}
|
||||
|
||||
std::string op_name = op_ptr->GetTypeString();
|
||||
std::string op_name = op_ptr->GetTypeString();
|
||||
std::optional<std::string> op_obj_name = op_ptr->GetObjectName();
|
||||
|
||||
float ave_time = invoker_ptr->Run(argument_ptr.get(),
|
||||
StreamConfig{nullptr,
|
||||
@@ -251,11 +253,12 @@ bool profile_gemm_universal_impl(int do_verification,
|
||||
|
||||
if(tflops > best_tflops && ave_time > 1e-10)
|
||||
{
|
||||
best_op_name = op_name;
|
||||
best_tflops = tflops;
|
||||
best_ave_time = ave_time;
|
||||
best_gb_per_sec = gb_per_sec;
|
||||
best_kbatch = kbatch_curr;
|
||||
best_op_name = op_name;
|
||||
best_op_object_name = op_obj_name;
|
||||
best_tflops = tflops;
|
||||
best_ave_time = ave_time;
|
||||
best_gb_per_sec = gb_per_sec;
|
||||
best_kbatch = kbatch_curr;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -306,6 +309,9 @@ bool profile_gemm_universal_impl(int do_verification,
|
||||
<< " : " << best_ave_time << " ms, " << best_tflops << " TFlops, " << best_gb_per_sec
|
||||
<< " GB/s, " << best_op_name << std::endl;
|
||||
|
||||
if(best_op_object_name)
|
||||
std::cout << best_op_object_name.value() << std::endl;
|
||||
|
||||
return pass;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user