addressing review comments

This commit is contained in:
khuagarw
2025-07-31 20:28:46 +00:00
parent 140a812264
commit 29c45192ba
2 changed files with 4 additions and 12 deletions

View File

@@ -15,12 +15,6 @@
namespace ck_tile {
#define LOW_CU_PROCESSORS 80
#define HIGH_CU_PROCESSORS 228
#define OPTIMAL_LATENCY_LOW_CU_PROCESSORS 0.005
#define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS 0.0015
#define OPTIMAL_LATENCY_SAFE_MARGIN 0.01
template <int MaxThreadPerBlock, int MinBlockPerCu, typename Kernel, typename... Args>
#if CK_TILE_USE_LAUNCH_BOUNDS
__launch_bounds__(MaxThreadPerBlock, MinBlockPerCu)
@@ -83,12 +77,6 @@ preprocess_profiling_impl(TimerType timer, const stream_config& s, PreprocessFun
return timer.duration() / s.nrepeat_;
}
template <typename PreprocessFunc>
CK_TILE_HOST float preprocess_profiling(const stream_config& s, PreprocessFunc preprocess)
{
return preprocess_profiling_impl(gpu_timer{}, s, preprocess);
}
template <typename TimerType, typename CallablesFunc, typename PreprocessFunc = std::nullptr_t>
CK_TILE_HOST double timing_loop_impl(TimerType timer,
const stream_config& s,

View File

@@ -20,6 +20,10 @@ namespace ck_tile {
*
* // create stream config with _some_stream_id_, and benchmark using cpu timer
* stream_config s = stream_config{_some_stream_id_, true, 0, 3, 10, false};
*
* // create stream config with _some_stream_id_, and enable gpu timer for rotating buffer with
*rotating buffer count stream_config s = stream_config{_some_stream_id_, true, 0, 3, 10, true,
*true, 1};
**/
struct stream_config