Merge pull request #85 from pauleonix/main

Add --disable-blocking-kernel and --profile options.
This commit is contained in:
Allison Vacanti
2022-04-26 13:23:58 -04:00
committed by GitHub
10 changed files with 73 additions and 0 deletions

View File

@@ -131,3 +131,15 @@
* Intended for use with external profiling tools.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.
* `--disable-blocking-kernel`
* Don't use the `blocking_kernel`.
* Intended for use with external profiling tools.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.
* `--profile`
* Implies `--run-once` and `--disable-blocking-kernel`.
* Intended for use with external profiling tools.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.

View File

@@ -193,6 +193,16 @@ struct benchmark_base
}
/// @}
/// If true, the benchmark does not use the blocking_kernel. This is intended
/// for use with external profiling tools. @{
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
benchmark_base &set_disable_blocking_kernel(bool v)
{
m_disable_blocking_kernel = v;
return *this;
}
/// @}
/// Accumulate at least this many seconds of timing data per measurement. @{
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
benchmark_base &set_min_time(nvbench::float64_t min_time)
@@ -256,6 +266,7 @@ protected:
optional_ref<nvbench::printer_base> m_printer;
bool m_run_once{false};
bool m_disable_blocking_kernel{false};
nvbench::int64_t m_min_samples{10};
nvbench::float64_t m_min_time{0.5};

View File

@@ -83,6 +83,13 @@ __global__ void block_stream(const volatile nvbench::int32_t *flag,
"The current timeout is set to %0.5g seconds.\n"
"\n"
"For more information, see the 'Benchmarks that sync' section of the\n"
"NVBench documentation.\n"
"\n"
"If this happens while profiling with an external tool,\n"
"pass the `--disable-blocking-kernel` flag or the `--profile` flag\n"
"(to also only run the benchmark once) to the executable.\n"
"\n"
"For more information, see the 'Benchmark Properties' section of the\n"
"NVBench documentation.\n\n",
timeout);
}

View File

@@ -41,6 +41,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
: m_state{exec_state}
, m_launch{m_state.get_cuda_stream()}
, m_run_once{exec_state.get_run_once()}
, m_no_block{exec_state.get_disable_blocking_kernel()}
, m_min_samples{exec_state.get_min_samples()}
, m_max_noise{exec_state.get_max_noise()}
, m_min_time{exec_state.get_min_time()}

View File

@@ -91,6 +91,7 @@ protected:
nvbench::blocking_kernel m_blocker;
bool m_run_once{false};
bool m_no_block{false};
nvbench::int64_t m_min_samples{};
nvbench::float64_t m_max_noise{}; // rel stdev

View File

@@ -64,6 +64,13 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
return;
}
if (!(modifier_tags & no_block) && this->get_disable_blocking_kernel())
{
constexpr auto no_block_tags = tags | no_block;
this->exec(no_block_tags, std::forward<KernelLauncher>(kernel_launcher));
return;
}
// If no measurements selected, pick some defaults based on the modifiers:
if constexpr (!measure_tags)
{

View File

@@ -443,6 +443,17 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
this->enable_run_once();
first += 1;
}
else if (arg == "--disable-blocking-kernel")
{
this->disable_blocking_kernel();
first += 1;
}
else if (arg == "--profile")
{
this->enable_run_once();
this->disable_blocking_kernel();
first += 1;
}
else if (arg == "--quiet" | arg == "-q")
{
// Setting this flag prevents the default stdout printer from being
@@ -710,6 +721,19 @@ void option_parser::enable_run_once()
bench.set_run_once(true);
}
void option_parser::disable_blocking_kernel()
{
// If no active benchmark, save args as global.
if (m_benchmarks.empty())
{
m_global_benchmark_args.push_back("--disable-blocking-kernel");
return;
}
benchmark_base &bench = *m_benchmarks.back();
bench.set_disable_blocking_kernel(true);
}
void option_parser::add_benchmark(const std::string &name)
try
{

View File

@@ -94,6 +94,7 @@ private:
void lock_gpu_clocks(const std::string &rate);
void enable_run_once();
void disable_blocking_kernel();
void add_benchmark(const std::string &name);
void replay_global_args();

View File

@@ -167,6 +167,12 @@ struct state
void set_run_once(bool v) { m_run_once = v; }
/// @}
/// If true, the benchmark does not use the blocking_kernel. This is intended
/// for use with external profiling tools. @{
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; }
/// @}
/// Accumulate at least this many seconds of timing data per measurement. @{
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
@@ -322,6 +328,7 @@ private:
std::size_t m_type_config_index{};
bool m_run_once{false};
bool m_disable_blocking_kernel{false};
nvbench::int64_t m_min_samples;
nvbench::float64_t m_min_time;

View File

@@ -35,6 +35,7 @@ namespace nvbench
state::state(const benchmark_base &bench)
: m_benchmark{bench}
, m_run_once{bench.get_run_once()}
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
, m_min_samples{bench.get_min_samples()}
, m_min_time{bench.get_min_time()}
, m_max_noise{bench.get_max_noise()}
@@ -51,6 +52,7 @@ state::state(const benchmark_base &bench,
, m_device{std::move(device)}
, m_type_config_index{type_config_index}
, m_run_once{bench.get_run_once()}
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
, m_min_samples{bench.get_min_samples()}
, m_min_time{bench.get_min_time()}
, m_max_noise{bench.get_max_noise()}