mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Disable throttling when sync exec tag is used.
This commit is contained in:
@@ -135,13 +135,16 @@
|
||||
|
||||
* `--throttle-threshold <value>`
|
||||
* Set the GPU throttle threshold as percentage of the device's default clock rate.
|
||||
* Default is 75%.
|
||||
* Default is 75.
|
||||
* Set to 0 to disable throttle detection entirely.
|
||||
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--throttle-recovery-delay <value>`
|
||||
* Set the GPU throttle recovery delay in seconds.
|
||||
* Default is 0.05 seconds.
|
||||
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
|
||||
exec_state.get_stopping_criterion())}
|
||||
, m_disable_blocking_kernel{exec_state.get_disable_blocking_kernel()}
|
||||
, m_run_once{exec_state.get_run_once()}
|
||||
, m_check_throttling(!exec_state.get_run_once() && exec_state.get_throttle_threshold() > 0.f)
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_skip_time{exec_state.get_skip_time()}
|
||||
, m_timeout{exec_state.get_timeout()}
|
||||
@@ -94,7 +95,7 @@ void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
|
||||
|
||||
void measure_cold_base::record_measurements()
|
||||
{
|
||||
if (!m_run_once)
|
||||
if (m_check_throttling)
|
||||
{
|
||||
const auto current_clock_rate = m_gpu_frequency.get_clock_frequency();
|
||||
const auto default_clock_rate =
|
||||
|
||||
@@ -95,6 +95,7 @@ protected:
|
||||
|
||||
bool m_disable_blocking_kernel{false};
|
||||
bool m_run_once{false};
|
||||
bool m_check_throttling;
|
||||
|
||||
nvbench::int64_t m_min_samples{};
|
||||
|
||||
@@ -142,7 +143,7 @@ struct measure_cold_base::kernel_launch_timer
|
||||
{
|
||||
m_measure.block_stream();
|
||||
}
|
||||
if (!m_measure.m_run_once)
|
||||
if (m_measure.m_check_throttling)
|
||||
{
|
||||
m_measure.gpu_frequency_start();
|
||||
}
|
||||
@@ -161,7 +162,7 @@ struct measure_cold_base::kernel_launch_timer
|
||||
m_measure.m_cpu_timer.start();
|
||||
m_measure.unblock_stream();
|
||||
}
|
||||
if (!m_measure.m_run_once)
|
||||
if (m_measure.m_check_throttling)
|
||||
{
|
||||
m_measure.gpu_frequency_stop();
|
||||
}
|
||||
|
||||
@@ -101,10 +101,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
|
||||
"`set_is_cpu_only(true)` is NOT called when defining the benchmark.");
|
||||
}
|
||||
|
||||
// Syncing will cause the blocking kernel pattern to deadlock:
|
||||
if constexpr (modifier_tags & sync)
|
||||
{
|
||||
// Syncing will cause the blocking kernel pattern to deadlock:
|
||||
this->set_disable_blocking_kernel(true);
|
||||
// Syncing will cause the throttle frequency measurements to be skewed heavily:
|
||||
this->set_throttle_threshold(0.f);
|
||||
}
|
||||
|
||||
if (this->is_skipped())
|
||||
|
||||
Reference in New Issue
Block a user