mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Improve 4k gemm perf (#1047)
* improve 4k gemm perf
* add f8 instances
* format
---------
Co-authored-by: Jing Zhang <jizha@amd.com>
[ROCm/composable_kernel commit: e8cddfdc3b]
This commit is contained in:
@@ -33,10 +33,13 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
|
||||
printf("Warm up 1 time\n");
|
||||
#endif
|
||||
// warm up
|
||||
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
||||
hip_check_error(hipGetLastError());
|
||||
for(int i = 0; i < stream_config.cold_niters_; ++i)
|
||||
{
|
||||
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
||||
hip_check_error(hipGetLastError());
|
||||
}
|
||||
|
||||
const int nrepeat = 10;
|
||||
const int nrepeat = stream_config.nrepeat_;
|
||||
#if DEBUG_LOG
|
||||
printf("Start running %d times...\n", nrepeat);
|
||||
#endif
|
||||
|
||||
@@ -11,4 +11,6 @@ struct StreamConfig
|
||||
hipStream_t stream_id_ = nullptr;
|
||||
bool time_kernel_ = false;
|
||||
int log_level_ = 0;
|
||||
int cold_niters_ = 50;
|
||||
int nrepeat_ = 200;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user