From f1b9d44304f7aa7d3549321a7526b28acd8cf94d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 2 Feb 2026 11:32:57 -0600 Subject: [PATCH] Support --no-batched CLI option The option sets m_skip_batched boolean member in benchmark_base class. Methods `bool get_skip_batched()` and `void set_skip_batched(bool)` added. m_skip_batched is also added to state class. Similarly named methods are added. CLI help file documents `--no-batched` option. --- docs/cli_help.md | 6 ++++++ nvbench/benchmark_base.cuh | 13 +++++++++++++ nvbench/detail/state_exec.cuh | 2 +- nvbench/option_parser.cu | 17 +++++++++++++++++ nvbench/option_parser.cuh | 1 + nvbench/state.cuh | 9 +++++++++ nvbench/state.cxx | 1 + 7 files changed, 48 insertions(+), 1 deletion(-) diff --git a/docs/cli_help.md b/docs/cli_help.md index 35265a1..8313adb 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -115,6 +115,12 @@ * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. +* `--no-batched` + * Do not run batched measurements even if enabled. + * Intended to shorten run-time when batched measurements are not of interest. + * Applied to the most recent `--benchmark`, or all benchrmarks if specified + before any `--benchmark` arguments. + ## Stopping Criteria * `--timeout ` diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index 3eddf2b..0ed901e 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -183,6 +183,18 @@ struct benchmark_base } /// @} + /// If true, the batched measurements for benchmark are not run. This is intended for use to + /// save resources when only non-batched measurements are of interest, although batched + /// measurements are meaningful and code to exercise them is compiled. This option has not + /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{ + [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; } + benchmark_base &set_skip_batched(bool v) + { + m_skip_batched = v; + return *this; + } + /// @} + /// If true, the benchmark does not use the blocking_kernel. This is intended /// for use with external profiling tools. @{ [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; } @@ -304,6 +316,7 @@ protected: bool m_is_cpu_only{false}; bool m_run_once{false}; bool m_disable_blocking_kernel{false}; + bool m_skip_batched{false}; nvbench::int64_t m_min_samples{10}; diff --git a/nvbench/detail/state_exec.cuh b/nvbench/detail/state_exec.cuh index bf4fb9e..9a65fc2 100644 --- a/nvbench/detail/state_exec.cuh +++ b/nvbench/detail/state_exec.cuh @@ -185,7 +185,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) static_assert(!(tags & no_batch), "Hot measurement doesn't support the `no_batch` exec_tag."); static_assert(!(tags & no_gpu), "Hot measurement doesn't support the `no_gpu` exec_tag."); - if (!this->get_run_once()) + if (!this->get_run_once() && !this->get_skip_batched()) { using measure_t = nvbench::detail::measure_hot; measure_t measure{*this, kernel_launcher}; diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index efddbb4..0957ce5 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -467,6 +467,11 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->enable_profile(); first += 1; } + else if (arg == "--no-batched") + { + this->disable_batched(); + first += 1; + } else if (arg == "--quiet" || arg == "-q") { // Setting this flag prevents the default stdout printer from being @@ -762,6 +767,18 @@ void option_parser::enable_profile() bench.set_run_once(true); } +void option_parser::disable_batched() +{ + // If no active benchmark, save args as global + if (m_benchmarks.empty()) + { + m_global_benchmark_args.push_back("--no-batched"); + return; + } + benchmark_base &bench = *m_benchmarks.back(); + bench.set_skip_batched(true); +} + void option_parser::add_benchmark(const std::string &name) try { diff --git a/nvbench/option_parser.cuh b/nvbench/option_parser.cuh index 110a844..7572ad2 100644 --- a/nvbench/option_parser.cuh +++ b/nvbench/option_parser.cuh @@ -90,6 +90,7 @@ private: void set_stopping_criterion(const std::string &criterion); void enable_profile(); + void disable_batched(); void add_benchmark(const std::string &name); void replay_global_args(); diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 61fd840..326dc41 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -161,6 +161,14 @@ struct state void set_run_once(bool v) { m_run_once = v; } /// @} + /// If true, the batched measurements of benchmark are not run. This is intended for use to + /// save resources when only non-batched measurements are of interest, although batched + /// measurements are meaningful and code to exercise them is compiled. This option has not + /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{ + [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; } + void set_skip_batched(bool v) { m_skip_batched = v; } + /// @} + /// If true, the benchmark does not use the blocking_kernel. This is intended /// for use with external profiling tools. @{ [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; } @@ -306,6 +314,7 @@ private: bool m_is_cpu_only{false}; bool m_run_once{false}; bool m_disable_blocking_kernel{false}; + bool m_skip_batched{false}; nvbench::criterion_params m_criterion_params; std::string m_stopping_criterion; diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 9d52667..2ae3c11 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -57,6 +57,7 @@ state::state(const benchmark_base &bench, , m_is_cpu_only(bench.get_is_cpu_only()) , m_run_once{bench.get_run_once()} , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()} + , m_skip_batched{bench.get_skip_batched()} , m_criterion_params{bench.get_criterion_params()} , m_stopping_criterion(bench.get_stopping_criterion()) , m_min_samples{bench.get_min_samples()}