From b8c664d22e6f67ba11c6095606bd8e89e33e0e4a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Thu, 3 Jul 2025 23:22:12 -0500 Subject: [PATCH] Do not use blocking kernel in warmup run of measure_cold (#241) See https://github.com/NVIDIA/nvbench/issues/240 --- nvbench/detail/measure_cold.cuh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index e3b329d..d7b8a5d 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -135,6 +135,11 @@ struct measure_cold_base::kernel_launch_timer , m_disable_blocking_kernel{measure.m_disable_blocking_kernel} {} + explicit kernel_launch_timer(measure_cold_base &measure, bool disable_blocking_kernel) + : m_measure{measure} + , m_disable_blocking_kernel{disable_blocking_kernel} + {} + __forceinline__ void start() { m_measure.flush_device_l2(); @@ -206,7 +211,10 @@ private: return; } - kernel_launch_timer timer(*this); + // disable use of blocking kernel for warm-up run + // see https://github.com/NVIDIA/nvbench/issues/240 + constexpr bool disable_blocking_kernel = true; + kernel_launch_timer timer(*this, disable_blocking_kernel); this->launch_kernel(timer); this->check_skip_time(m_cuda_timer.get_duration());