mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 04:49:54 +00:00
Update to gpu_timer for rotating_buffer (#2524)
* update gpu_timer for rotating buffer as hipblasLt's implementation * timing fix * Updating gpu timer for old ck as well * Revert "Updating gpu timer for old ck as well" This reverts commit958cd1bc99. * code clean up with runtime argument; function rename * code cleanup * general timer fixes * bug fix * clang formatted * addressing reveiew comments * clang formatted * Addressing review comments * CI fix --------- Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com> [ROCm/composable_kernel commit:61e21f5567]
This commit is contained in:
@@ -34,7 +34,8 @@ void benchmark_gemm(const ck_tile::ArgParser& arg_parser)
|
||||
arg_parser.get_bool("log"),
|
||||
arg_parser.get_str("csv_filename"),
|
||||
arg_parser.get_bool("flush_cache"),
|
||||
arg_parser.get_int("rotating_count")};
|
||||
arg_parser.get_int("rotating_count"),
|
||||
arg_parser.get_int("bench_time")};
|
||||
|
||||
auto& profiler = GemmProfiler::instance(setting);
|
||||
|
||||
|
||||
@@ -125,6 +125,7 @@ struct Setting
|
||||
std::string csv_filename_;
|
||||
bool flush_cache_;
|
||||
int rotating_count_;
|
||||
int bench_time_ms_;
|
||||
};
|
||||
|
||||
inline std::string get_rocm_version()
|
||||
|
||||
@@ -110,6 +110,7 @@ inline auto create_args(int argc, char* argv[])
|
||||
"To flush cache, possible values are true or false. "
|
||||
"Default is false.")
|
||||
.insert("rotating_count", "5", "number of iterations to rotate the cache. default is 5.")
|
||||
.insert("bench_time", "0", "benchmark time in ms. default is 0 ms.")
|
||||
.insert("metric",
|
||||
"0",
|
||||
"Metric with which to measure kernel performance. Set to 0 for latency, 1 for "
|
||||
|
||||
@@ -348,7 +348,7 @@ struct GemmKernel {{
|
||||
hipGetErrorString(hipMemsetAsync(
|
||||
args.e_ptr, 0, args.M * args.N * sizeof(CDataType), stream.stream_id_));
|
||||
}};
|
||||
ave_time = ck_tile::launch_kernel_preprocess(
|
||||
ave_time = ck_tile::launch_kernel_time_mask(
|
||||
stream,
|
||||
run_flush_cache,
|
||||
ck_tile::make_kernel<blocks.x, kBlockPerCu>(
|
||||
|
||||
@@ -131,7 +131,8 @@ class GemmProfiler
|
||||
setting_.n_repeat_,
|
||||
setting_.is_gpu_timer_,
|
||||
setting_.flush_cache_,
|
||||
setting_.rotating_count_});
|
||||
setting_.rotating_count_,
|
||||
setting_.bench_time_ms_});
|
||||
process_result(gemm_problem,
|
||||
c_m_n_dev_buf,
|
||||
c_m_n_host_result,
|
||||
|
||||
Reference in New Issue
Block a user