mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Add --run-once option.
Fixes #10. Adds a mode that forces a benchmark to only run once, simplifying profiling usecases. This can be enabled by any of the following methods: * Passing `--run-once` on the command line * `NVBENCH_CREATE(...).set_run_once(true)` when declaring a benchmark * `state.set_run_once(true)` from within the benchmark implementation.
This commit is contained in:
@@ -94,3 +94,10 @@
|
||||
noise).
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--run-once`
|
||||
* Only run the benchmark once, skipping any warmup runs and batched
|
||||
measurements.
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
@@ -177,6 +177,17 @@ struct benchmark_base
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If true, the benchmark is only run once, skipping all warmup runs and only
|
||||
/// executing a single non-batched measurement. This is intended for use with
|
||||
/// external profiling tools. @{
|
||||
[[nodiscard]] bool get_run_once() const { return m_run_once; }
|
||||
benchmark_base &set_run_once(bool v)
|
||||
{
|
||||
m_run_once = v;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
benchmark_base &set_min_time(nvbench::float64_t min_time)
|
||||
@@ -239,6 +250,8 @@ protected:
|
||||
|
||||
optional_ref<nvbench::printer_base> m_printer;
|
||||
|
||||
bool m_run_once{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{10};
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_noise{0.005}; // 0.5% relative standard deviation
|
||||
|
||||
@@ -36,6 +36,7 @@ namespace nvbench::detail
|
||||
|
||||
measure_cold_base::measure_cold_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_run_once{exec_state.get_run_once()}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_max_noise{exec_state.get_max_noise()}
|
||||
, m_min_time{exec_state.get_min_time()}
|
||||
|
||||
@@ -98,6 +98,8 @@ protected:
|
||||
nvbench::detail::l2flush m_l2flush;
|
||||
nvbench::blocking_kernel m_blocker;
|
||||
|
||||
bool m_run_once{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{};
|
||||
nvbench::float64_t m_max_noise{}; // rel stdev
|
||||
nvbench::float64_t m_min_time{};
|
||||
@@ -177,6 +179,11 @@ private:
|
||||
// measurement.
|
||||
void run_warmup()
|
||||
{
|
||||
if (m_run_once)
|
||||
{ // Skip warmups
|
||||
return;
|
||||
}
|
||||
|
||||
kernel_launch_timer<use_blocking_kernel> timer(*this);
|
||||
this->launch_kernel(timer);
|
||||
this->check_skip_time(m_cuda_timer.get_duration());
|
||||
@@ -206,6 +213,11 @@ private:
|
||||
m_timeout_timer.stop();
|
||||
const auto total_time = m_timeout_timer.get_duration();
|
||||
|
||||
if (m_run_once)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (m_total_cuda_time > m_min_time && // Min time okay
|
||||
m_total_samples > m_min_samples && // Min samples okay
|
||||
m_cuda_noise < m_max_noise) // Noise okay
|
||||
|
||||
@@ -44,11 +44,20 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
|
||||
"`ExecTags` argument must be a member (or combination of "
|
||||
"members) from nvbench::exec_tag.");
|
||||
|
||||
// If no measurements selected, pick some defaults based on the modifiers:
|
||||
constexpr auto measure_tags = tags & measure_mask;
|
||||
constexpr auto modifier_tags = tags & modifier_mask;
|
||||
|
||||
// "run once" is handled by the cold measurement:
|
||||
if (!(modifier_tags & run_once) && this->get_run_once())
|
||||
{
|
||||
constexpr auto run_once_tags = modifier_tags | cold | run_once;
|
||||
this->exec(run_once_tags, std::forward<KernelLauncher>(kernel_launcher));
|
||||
return;
|
||||
}
|
||||
|
||||
// If no measurements selected, pick some defaults based on the modifiers:
|
||||
if constexpr (!measure_tags)
|
||||
{
|
||||
constexpr auto modifier_tags = tags & modifier_mask;
|
||||
if constexpr (modifier_tags & (timer | sync))
|
||||
{ // Can't do hot timings with manual timer or sync; whole point is to not
|
||||
// sync in between executions.
|
||||
|
||||
@@ -34,7 +34,8 @@ enum class exec_flag
|
||||
timer = 0x01, // KernelLauncher uses manual timing
|
||||
no_block = 0x02, // Disables use of `blocking_kernel`.
|
||||
sync = 0x04, // KernelLauncher has indicated that it will sync
|
||||
modifier_mask = timer | no_block | sync,
|
||||
run_once = 0x08, // Only run the benchmark once (for profiling).
|
||||
modifier_mask = timer | no_block | sync | run_once,
|
||||
|
||||
// Measurement types:
|
||||
cold = 0x0100, // measure_hot
|
||||
@@ -93,6 +94,7 @@ using none_t = tag<nvbench::detail::exec_flag::none>;
|
||||
using timer_t = tag<nvbench::detail::exec_flag::timer>;
|
||||
using no_block_t = tag<nvbench::detail::exec_flag::no_block>;
|
||||
using sync_t = tag<nvbench::detail::exec_flag::sync>;
|
||||
using run_once_t = tag<nvbench::detail::exec_flag::run_once>;
|
||||
using hot_t = tag<nvbench::detail::exec_flag::hot>;
|
||||
using cold_t = tag<nvbench::detail::exec_flag::cold>;
|
||||
using modifier_mask_t = tag<nvbench::detail::exec_flag::modifier_mask>;
|
||||
@@ -102,6 +104,7 @@ constexpr inline none_t none;
|
||||
constexpr inline timer_t timer;
|
||||
constexpr inline no_block_t no_block;
|
||||
constexpr inline sync_t sync;
|
||||
constexpr inline run_once_t run_once;
|
||||
constexpr inline cold_t cold;
|
||||
constexpr inline hot_t hot;
|
||||
constexpr inline modifier_mask_t modifier_mask;
|
||||
|
||||
@@ -88,6 +88,8 @@ private:
|
||||
void print_help() const;
|
||||
void print_help_axis() const;
|
||||
|
||||
void enable_run_once();
|
||||
|
||||
void add_benchmark(const std::string &name);
|
||||
void replay_global_args();
|
||||
|
||||
|
||||
@@ -385,6 +385,11 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
this->print_list();
|
||||
std::exit(0);
|
||||
}
|
||||
else if (arg == "--run-once")
|
||||
{
|
||||
this->enable_run_once();
|
||||
first += 1;
|
||||
}
|
||||
else if (arg == "--quiet" | arg == "-q")
|
||||
{
|
||||
// Setting this flag prevents the default stdout printer from being
|
||||
@@ -542,6 +547,19 @@ void option_parser::print_help_axis() const
|
||||
fmt::print("{}\n", ::cli_help_axis_text);
|
||||
}
|
||||
|
||||
void option_parser::enable_run_once()
|
||||
{
|
||||
// If no active benchmark, save args as global.
|
||||
if (m_benchmarks.empty())
|
||||
{
|
||||
m_global_benchmark_args.push_back("--run-once");
|
||||
return;
|
||||
}
|
||||
|
||||
benchmark_base &bench = *m_benchmarks.back();
|
||||
bench.set_run_once(true);
|
||||
}
|
||||
|
||||
void option_parser::add_benchmark(const std::string &name)
|
||||
try
|
||||
{
|
||||
|
||||
@@ -150,6 +150,13 @@ struct state
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If true, the benchmark is only run once, skipping all warmup runs and only
|
||||
/// executing a single non-batched measurement. This is intended for use with
|
||||
/// external profiling tools. @{
|
||||
[[nodiscard]] bool get_run_once() const { return m_run_once; }
|
||||
void set_run_once(bool v) { m_run_once = v; }
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
|
||||
@@ -256,6 +263,8 @@ private:
|
||||
std::optional<nvbench::device_info> m_device;
|
||||
std::size_t m_type_config_index{};
|
||||
|
||||
bool m_run_once{false};
|
||||
|
||||
nvbench::int64_t m_min_samples;
|
||||
nvbench::float64_t m_min_time;
|
||||
nvbench::float64_t m_max_noise;
|
||||
|
||||
@@ -33,6 +33,7 @@ namespace nvbench
|
||||
|
||||
state::state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
@@ -48,6 +49,7 @@ state::state(const benchmark_base &bench,
|
||||
, m_axis_values{std::move(values)}
|
||||
, m_device{std::move(device)}
|
||||
, m_type_config_index{type_config_index}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
|
||||
Reference in New Issue
Block a user