Update to gpu_timer for rotating_buffer (#2524)

* update gpu_timer for rotating buffer as hipblasLt's implementation

* timing fix

* Updating gpu timer for old ck as well

* Revert "Updating gpu timer for old ck as well"

This reverts commit 958cd1bc99.

* code clean up with runtime argument; function rename

* code cleanup

* general timer fixes

* bug fix

* clang formatted

* addressing reveiew comments

* clang formatted

* Addressing review comments

* CI fix

---------

Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>

[ROCm/composable_kernel commit: 61e21f5567]
This commit is contained in:
Khushbu Agarwal
2025-07-29 15:21:05 -07:00
committed by GitHub
parent 96c08decf7
commit 3bc1bdff9a
13 changed files with 182 additions and 78 deletions

View File

@@ -34,7 +34,8 @@ void benchmark_gemm(const ck_tile::ArgParser& arg_parser)
arg_parser.get_bool("log"),
arg_parser.get_str("csv_filename"),
arg_parser.get_bool("flush_cache"),
arg_parser.get_int("rotating_count")};
arg_parser.get_int("rotating_count"),
arg_parser.get_int("bench_time")};
auto& profiler = GemmProfiler::instance(setting);

View File

@@ -125,6 +125,7 @@ struct Setting
std::string csv_filename_;
bool flush_cache_;
int rotating_count_;
int bench_time_ms_;
};
inline std::string get_rocm_version()

View File

@@ -110,6 +110,7 @@ inline auto create_args(int argc, char* argv[])
"To flush cache, possible values are true or false. "
"Default is false.")
.insert("rotating_count", "5", "number of iterations to rotate the cache. default is 5.")
.insert("bench_time", "0", "benchmark time in ms. default is 0 ms.")
.insert("metric",
"0",
"Metric with which to measure kernel performance. Set to 0 for latency, 1 for "

View File

@@ -348,7 +348,7 @@ struct GemmKernel {{
hipGetErrorString(hipMemsetAsync(
args.e_ptr, 0, args.M * args.N * sizeof(CDataType), stream.stream_id_));
}};
ave_time = ck_tile::launch_kernel_preprocess(
ave_time = ck_tile::launch_kernel_time_mask(
stream,
run_flush_cache,
ck_tile::make_kernel<blocks.x, kBlockPerCu>(

View File

@@ -131,7 +131,8 @@ class GemmProfiler
setting_.n_repeat_,
setting_.is_gpu_timer_,
setting_.flush_cache_,
setting_.rotating_count_});
setting_.rotating_count_,
setting_.bench_time_ms_});
process_result(gemm_problem,
c_m_n_dev_buf,
c_m_n_host_result,