mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Merge pull request #85 from pauleonix/main
Add --disable-blocking-kernel and --profile options.
This commit is contained in:
@@ -131,3 +131,15 @@
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--disable-blocking-kernel`
|
||||
* Don't use the `blocking_kernel`.
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--profile`
|
||||
* Implies `--run-once` and `--disable-blocking-kernel`.
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
@@ -193,6 +193,16 @@ struct benchmark_base
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If true, the benchmark does not use the blocking_kernel. This is intended
|
||||
/// for use with external profiling tools. @{
|
||||
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
|
||||
benchmark_base &set_disable_blocking_kernel(bool v)
|
||||
{
|
||||
m_disable_blocking_kernel = v;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
benchmark_base &set_min_time(nvbench::float64_t min_time)
|
||||
@@ -256,6 +266,7 @@ protected:
|
||||
optional_ref<nvbench::printer_base> m_printer;
|
||||
|
||||
bool m_run_once{false};
|
||||
bool m_disable_blocking_kernel{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{10};
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
|
||||
@@ -83,6 +83,13 @@ __global__ void block_stream(const volatile nvbench::int32_t *flag,
|
||||
"The current timeout is set to %0.5g seconds.\n"
|
||||
"\n"
|
||||
"For more information, see the 'Benchmarks that sync' section of the\n"
|
||||
"NVBench documentation.\n"
|
||||
"\n"
|
||||
"If this happens while profiling with an external tool,\n"
|
||||
"pass the `--disable-blocking-kernel` flag or the `--profile` flag\n"
|
||||
"(to also only run the benchmark once) to the executable.\n"
|
||||
"\n"
|
||||
"For more information, see the 'Benchmark Properties' section of the\n"
|
||||
"NVBench documentation.\n\n",
|
||||
timeout);
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_launch{m_state.get_cuda_stream()}
|
||||
, m_run_once{exec_state.get_run_once()}
|
||||
, m_no_block{exec_state.get_disable_blocking_kernel()}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_max_noise{exec_state.get_max_noise()}
|
||||
, m_min_time{exec_state.get_min_time()}
|
||||
|
||||
@@ -91,6 +91,7 @@ protected:
|
||||
nvbench::blocking_kernel m_blocker;
|
||||
|
||||
bool m_run_once{false};
|
||||
bool m_no_block{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{};
|
||||
nvbench::float64_t m_max_noise{}; // rel stdev
|
||||
|
||||
@@ -64,6 +64,13 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(modifier_tags & no_block) && this->get_disable_blocking_kernel())
|
||||
{
|
||||
constexpr auto no_block_tags = tags | no_block;
|
||||
this->exec(no_block_tags, std::forward<KernelLauncher>(kernel_launcher));
|
||||
return;
|
||||
}
|
||||
|
||||
// If no measurements selected, pick some defaults based on the modifiers:
|
||||
if constexpr (!measure_tags)
|
||||
{
|
||||
|
||||
@@ -443,6 +443,17 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
this->enable_run_once();
|
||||
first += 1;
|
||||
}
|
||||
else if (arg == "--disable-blocking-kernel")
|
||||
{
|
||||
this->disable_blocking_kernel();
|
||||
first += 1;
|
||||
}
|
||||
else if (arg == "--profile")
|
||||
{
|
||||
this->enable_run_once();
|
||||
this->disable_blocking_kernel();
|
||||
first += 1;
|
||||
}
|
||||
else if (arg == "--quiet" | arg == "-q")
|
||||
{
|
||||
// Setting this flag prevents the default stdout printer from being
|
||||
@@ -710,6 +721,19 @@ void option_parser::enable_run_once()
|
||||
bench.set_run_once(true);
|
||||
}
|
||||
|
||||
void option_parser::disable_blocking_kernel()
|
||||
{
|
||||
// If no active benchmark, save args as global.
|
||||
if (m_benchmarks.empty())
|
||||
{
|
||||
m_global_benchmark_args.push_back("--disable-blocking-kernel");
|
||||
return;
|
||||
}
|
||||
|
||||
benchmark_base &bench = *m_benchmarks.back();
|
||||
bench.set_disable_blocking_kernel(true);
|
||||
}
|
||||
|
||||
void option_parser::add_benchmark(const std::string &name)
|
||||
try
|
||||
{
|
||||
|
||||
@@ -94,6 +94,7 @@ private:
|
||||
void lock_gpu_clocks(const std::string &rate);
|
||||
|
||||
void enable_run_once();
|
||||
void disable_blocking_kernel();
|
||||
|
||||
void add_benchmark(const std::string &name);
|
||||
void replay_global_args();
|
||||
|
||||
@@ -167,6 +167,12 @@ struct state
|
||||
void set_run_once(bool v) { m_run_once = v; }
|
||||
/// @}
|
||||
|
||||
/// If true, the benchmark does not use the blocking_kernel. This is intended
|
||||
/// for use with external profiling tools. @{
|
||||
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
|
||||
void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; }
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
|
||||
@@ -322,6 +328,7 @@ private:
|
||||
std::size_t m_type_config_index{};
|
||||
|
||||
bool m_run_once{false};
|
||||
bool m_disable_blocking_kernel{false};
|
||||
|
||||
nvbench::int64_t m_min_samples;
|
||||
nvbench::float64_t m_min_time;
|
||||
|
||||
@@ -35,6 +35,7 @@ namespace nvbench
|
||||
state::state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
@@ -51,6 +52,7 @@ state::state(const benchmark_base &bench,
|
||||
, m_device{std::move(device)}
|
||||
, m_type_config_index{type_config_index}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
|
||||
Reference in New Issue
Block a user