mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
Improve 4k gemm perf (#1047)
* improve 4k gemm perf * add f8 instances * format --------- Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
@@ -33,10 +33,13 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
|
||||
printf("Warm up 1 time\n");
|
||||
#endif
|
||||
// warm up
|
||||
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
||||
hip_check_error(hipGetLastError());
|
||||
for(int i = 0; i < stream_config.cold_niters_; ++i)
|
||||
{
|
||||
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
||||
hip_check_error(hipGetLastError());
|
||||
}
|
||||
|
||||
const int nrepeat = 10;
|
||||
const int nrepeat = stream_config.nrepeat_;
|
||||
#if DEBUG_LOG
|
||||
printf("Start running %d times...\n", nrepeat);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user