Merge pull request #216 from alliepiper/disable_throttle_for_sync

Disable throttling when `sync` exec tag is used.
This commit is contained in:
Allison Piper
2025-04-24 19:02:39 -04:00
committed by GitHub
4 changed files with 12 additions and 5 deletions

View File

@@ -135,13 +135,16 @@
* `--throttle-threshold <value>`
* Set the GPU throttle threshold as percentage of the device's default clock rate.
* Default is 75%.
* Default is 75.
* Set to 0 to disable throttle detection entirely.
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.
* `--throttle-recovery-delay <value>`
* Set the GPU throttle recovery delay in seconds.
* Default is 0.05 seconds.
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.

View File

@@ -43,6 +43,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
exec_state.get_stopping_criterion())}
, m_disable_blocking_kernel{exec_state.get_disable_blocking_kernel()}
, m_run_once{exec_state.get_run_once()}
, m_check_throttling(!exec_state.get_run_once() && exec_state.get_throttle_threshold() > 0.f)
, m_min_samples{exec_state.get_min_samples()}
, m_skip_time{exec_state.get_skip_time()}
, m_timeout{exec_state.get_timeout()}
@@ -94,7 +95,7 @@ void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
void measure_cold_base::record_measurements()
{
if (!m_run_once)
if (m_check_throttling)
{
const auto current_clock_rate = m_gpu_frequency.get_clock_frequency();
const auto default_clock_rate =

View File

@@ -95,6 +95,7 @@ protected:
bool m_disable_blocking_kernel{false};
bool m_run_once{false};
bool m_check_throttling;
nvbench::int64_t m_min_samples{};
@@ -142,7 +143,7 @@ struct measure_cold_base::kernel_launch_timer
{
m_measure.block_stream();
}
if (!m_measure.m_run_once)
if (m_measure.m_check_throttling)
{
m_measure.gpu_frequency_start();
}
@@ -161,7 +162,7 @@ struct measure_cold_base::kernel_launch_timer
m_measure.m_cpu_timer.start();
m_measure.unblock_stream();
}
if (!m_measure.m_run_once)
if (m_measure.m_check_throttling)
{
m_measure.gpu_frequency_stop();
}

View File

@@ -101,10 +101,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
"`set_is_cpu_only(true)` is NOT called when defining the benchmark.");
}
// Syncing will cause the blocking kernel pattern to deadlock:
if constexpr (modifier_tags & sync)
{
// Syncing will cause the blocking kernel pattern to deadlock:
this->set_disable_blocking_kernel(true);
// Syncing will cause the throttle frequency measurements to be skewed heavily:
this->set_throttle_threshold(0.f);
}
if (this->is_skipped())