Run once disables blocking kernel (#252)

* Measure cold must not use block_kernel for single runs Per https://github.com/NVIDIA/nvbench/issues/242, we should not use blocking kernel when --run-once, or --profile is used to avoid possible deadlocks when providing with external tools, also to avoid deadlocking when Python programs load the program on the first execution. * Measure hot should not use blocking kernel during warmup This change follows suite of measure_cold, where it is prompted by deadlock, see https://github.com/NVIDIA/nvbench/pull/241 * Remove setting of CUDA_MODULE_LOADING=EAGER This is no longer necessary as warm-up runs in regular runs, or the single run in (run-once/profile) no longer use blocking kernel.
2026-04-20 06:48:53 +00:00 · 2025-07-28 14:14:54 -05:00
parent 3de9dc95da
commit 2ab5e2d1be
3 changed files with 4 additions and 22 deletions
--- a/nvbench/detail/measure_cold.cuh
+++ b/nvbench/detail/measure_cold.cuh
@@ -220,7 +220,10 @@ private:

  void run_trials()
  {
-    kernel_launch_timer timer(*this);
+    // do not use blocking kernel if benchmark is only run once, e.g., when profiling
+    // ref: https://github.com/NVIDIA/nvbench/issue/242
+    const bool disable_blocking_kernel = m_run_once || m_disable_blocking_kernel;
+    kernel_launch_timer timer(*this, disable_blocking_kernel);
    do
    {
      this->launch_kernel(timer);
--- a/nvbench/detail/measure_hot.cuh
+++ b/nvbench/detail/measure_hot.cuh
@@ -106,19 +106,10 @@ private:
  // measurement.
  void run_warmup()
  {
-    if (!m_disable_blocking_kernel)
-    {
-      this->block_stream();
-    }
-
    m_cuda_timer.start(m_launch.get_stream());
    this->launch_kernel();
    m_cuda_timer.stop(m_launch.get_stream());

-    if (!m_disable_blocking_kernel)
-    {
-      this->unblock_stream();
-    }
    this->sync_stream();

    this->check_skip_time(m_cuda_timer.get_duration());
--- a/nvbench/main.cuh
+++ b/nvbench/main.cuh
@@ -172,20 +172,8 @@
 namespace nvbench::detail
 {

-inline void set_env(const char *name, const char *value)
-{
-#ifdef _MSC_VER
-  _putenv_s(name, value);
-#else
-  setenv(name, value, 1);
-#endif
-}
-
 inline void main_initialize(int, char **)
 {
-  // See NVIDIA/NVBench#136 for CUDA_MODULE_LOADING
-  set_env("CUDA_MODULE_LOADING", "EAGER");
-
  // Initialize CUDA driver API if needed:
 #ifdef NVBENCH_HAS_CUPTI
  NVBENCH_DRIVER_API_CALL(cuInit(0));