mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
addressing review comments
This commit is contained in:
@@ -15,12 +15,6 @@
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
#define LOW_CU_PROCESSORS 80
|
||||
#define HIGH_CU_PROCESSORS 228
|
||||
#define OPTIMAL_LATENCY_LOW_CU_PROCESSORS 0.005
|
||||
#define OPTIMAL_LATENCY_HIGH_CU_PROCESSORS 0.0015
|
||||
#define OPTIMAL_LATENCY_SAFE_MARGIN 0.01
|
||||
|
||||
template <int MaxThreadPerBlock, int MinBlockPerCu, typename Kernel, typename... Args>
|
||||
#if CK_TILE_USE_LAUNCH_BOUNDS
|
||||
__launch_bounds__(MaxThreadPerBlock, MinBlockPerCu)
|
||||
@@ -83,12 +77,6 @@ preprocess_profiling_impl(TimerType timer, const stream_config& s, PreprocessFun
|
||||
return timer.duration() / s.nrepeat_;
|
||||
}
|
||||
|
||||
template <typename PreprocessFunc>
|
||||
CK_TILE_HOST float preprocess_profiling(const stream_config& s, PreprocessFunc preprocess)
|
||||
{
|
||||
return preprocess_profiling_impl(gpu_timer{}, s, preprocess);
|
||||
}
|
||||
|
||||
template <typename TimerType, typename CallablesFunc, typename PreprocessFunc = std::nullptr_t>
|
||||
CK_TILE_HOST double timing_loop_impl(TimerType timer,
|
||||
const stream_config& s,
|
||||
|
||||
@@ -20,6 +20,10 @@ namespace ck_tile {
|
||||
*
|
||||
* // create stream config with _some_stream_id_, and benchmark using cpu timer
|
||||
* stream_config s = stream_config{_some_stream_id_, true, 0, 3, 10, false};
|
||||
*
|
||||
* // create stream config with _some_stream_id_, and enable gpu timer for rotating buffer with
|
||||
*rotating buffer count stream_config s = stream_config{_some_stream_id_, true, 0, 3, 10, true,
|
||||
*true, 1};
|
||||
**/
|
||||
|
||||
struct stream_config
|
||||
|
||||
Reference in New Issue
Block a user