Merge pull request #309 from oleksandr-pavlyk/support-skipping-batched-runs

2026-05-12 01:10:01 +00:00 · 2026-02-02 17:57:45 -06:00
parent 34a089f805 a33a454a2d
commit 8a128ed7d9
7 changed files with 50 additions and 1 deletions
--- a/docs/cli_help.md
+++ b/docs/cli_help.md
@@ -115,6 +115,12 @@
  * Applies to the most recent `--benchmark`, or all benchmarks if specified
    before any `--benchmark` arguments.

+* `--no-batch`
+  * Do not run batched measurements even if enabled.
+  * Intended to shorten run-time when batched measurements are not of interest.
+  * Applied to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
+
 ## Stopping Criteria

 * `--timeout <seconds>`
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -183,6 +183,18 @@ struct benchmark_base
  }
  /// @}

+  /// If true, the batched measurements for benchmark are not run. This is intended for use to
+  /// save resources when only non-batched measurements are of interest, although batched
+  /// measurements are meaningful and code to exercise them is compiled. This option has no
+  /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
+  [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
+  benchmark_base &set_skip_batched(bool v)
+  {
+    m_skip_batched = v;
+    return *this;
+  }
+  /// @}
+
  /// If true, the benchmark does not use the blocking_kernel. This is intended
  /// for use with external profiling tools. @{
  [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
@@ -304,6 +316,7 @@ protected:
  bool m_is_cpu_only{false};
  bool m_run_once{false};
  bool m_disable_blocking_kernel{false};
+  bool m_skip_batched{false};

  nvbench::int64_t m_min_samples{10};

--- a/nvbench/detail/state_exec.cuh
+++ b/nvbench/detail/state_exec.cuh
@@ -185,7 +185,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
      static_assert(!(tags & no_batch), "Hot measurement doesn't support the `no_batch` exec_tag.");
      static_assert(!(tags & no_gpu), "Hot measurement doesn't support the `no_gpu` exec_tag.");

-      if (!this->get_run_once())
+      if (!this->skip_hot_measurement())
      {
        using measure_t = nvbench::detail::measure_hot<KL>;
        measure_t measure{*this, kernel_launcher};
--- a/nvbench/option_parser.cu
+++ b/nvbench/option_parser.cu
@@ -467,6 +467,11 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
      this->enable_profile();
      first += 1;
    }
+    else if (arg == "--no-batch")
+    {
+      this->disable_batched();
+      first += 1;
+    }
    else if (arg == "--quiet" || arg == "-q")
    {
      // Setting this flag prevents the default stdout printer from being
@@ -762,6 +767,18 @@ void option_parser::enable_profile()
  bench.set_run_once(true);
 }

+void option_parser::disable_batched()
+{
+  // If no active benchmark, save args as global
+  if (m_benchmarks.empty())
+  {
+    m_global_benchmark_args.push_back("--no-batch");
+    return;
+  }
+  benchmark_base &bench = *m_benchmarks.back();
+  bench.set_skip_batched(true);
+}
+
 void option_parser::add_benchmark(const std::string &name)
 try
 {
--- a/nvbench/option_parser.cuh
+++ b/nvbench/option_parser.cuh
@@ -90,6 +90,7 @@ private:
  void set_stopping_criterion(const std::string &criterion);

  void enable_profile();
+  void disable_batched();

  void add_benchmark(const std::string &name);
  void replay_global_args();
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -161,6 +161,14 @@ struct state
  void set_run_once(bool v) { m_run_once = v; }
  /// @}

+  /// If true, the batched measurements of benchmark are not run. This is intended for use to
+  /// save resources when only non-batched measurements are of interest, although batched
+  /// measurements are meaningful and code to exercise them is compiled. This option has no
+  /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
+  [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
+  void set_skip_batched(bool v) { m_skip_batched = v; }
+  /// @}
+
  /// If true, the benchmark does not use the blocking_kernel. This is intended
  /// for use with external profiling tools. @{
  [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
@@ -298,6 +306,8 @@ private:
        std::optional<nvbench::device_info> device,
        std::size_t type_config_index);

+  [[nodiscard]] bool skip_hot_measurement() const { return get_run_once() || get_skip_batched(); }
+
  std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
  nvbench::named_values m_axis_values;
  std::optional<nvbench::device_info> m_device;
@@ -306,6 +316,7 @@ private:
  bool m_is_cpu_only{false};
  bool m_run_once{false};
  bool m_disable_blocking_kernel{false};
+  bool m_skip_batched{false};

  nvbench::criterion_params m_criterion_params;
  std::string m_stopping_criterion;
--- a/nvbench/state.cxx
+++ b/nvbench/state.cxx
@@ -57,6 +57,7 @@ state::state(const benchmark_base &bench,
    , m_is_cpu_only(bench.get_is_cpu_only())
    , m_run_once{bench.get_run_once()}
    , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
+    , m_skip_batched{bench.get_skip_batched()}
    , m_criterion_params{bench.get_criterion_params()}
    , m_stopping_criterion(bench.get_stopping_criterion())
    , m_min_samples{bench.get_min_samples()}