From e4057575c7134321ac0d2b676babc8d48e299787 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Thu, 24 Apr 2025 22:48:35 +0000 Subject: [PATCH] Disable throttling when `sync` exec tag is used. --- docs/cli_help.md | 5 ++++- nvbench/detail/measure_cold.cu | 3 ++- nvbench/detail/measure_cold.cuh | 5 +++-- nvbench/detail/state_exec.cuh | 4 +++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/cli_help.md b/docs/cli_help.md index 336a826..6021e0e 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -135,13 +135,16 @@ * `--throttle-threshold ` * Set the GPU throttle threshold as percentage of the device's default clock rate. - * Default is 75%. + * Default is 75. + * Set to 0 to disable throttle detection entirely. + * Note that throttling is disabled when `nvbench::exec_tag::sync` is used. * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. * `--throttle-recovery-delay ` * Set the GPU throttle recovery delay in seconds. * Default is 0.05 seconds. + * Note that throttling is disabled when `nvbench::exec_tag::sync` is used. * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 5e1c6c2..6f0f5dd 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -43,6 +43,7 @@ measure_cold_base::measure_cold_base(state &exec_state) exec_state.get_stopping_criterion())} , m_disable_blocking_kernel{exec_state.get_disable_blocking_kernel()} , m_run_once{exec_state.get_run_once()} + , m_check_throttling(!exec_state.get_run_once() && exec_state.get_throttle_threshold() > 0.f) , m_min_samples{exec_state.get_min_samples()} , m_skip_time{exec_state.get_skip_time()} , m_timeout{exec_state.get_timeout()} @@ -94,7 +95,7 @@ void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); } void measure_cold_base::record_measurements() { - if (!m_run_once) + if (m_check_throttling) { const auto current_clock_rate = m_gpu_frequency.get_clock_frequency(); const auto default_clock_rate = diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index 93bff0f..e3b329d 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -95,6 +95,7 @@ protected: bool m_disable_blocking_kernel{false}; bool m_run_once{false}; + bool m_check_throttling; nvbench::int64_t m_min_samples{}; @@ -142,7 +143,7 @@ struct measure_cold_base::kernel_launch_timer { m_measure.block_stream(); } - if (!m_measure.m_run_once) + if (m_measure.m_check_throttling) { m_measure.gpu_frequency_start(); } @@ -161,7 +162,7 @@ struct measure_cold_base::kernel_launch_timer m_measure.m_cpu_timer.start(); m_measure.unblock_stream(); } - if (!m_measure.m_run_once) + if (m_measure.m_check_throttling) { m_measure.gpu_frequency_stop(); } diff --git a/nvbench/detail/state_exec.cuh b/nvbench/detail/state_exec.cuh index a0311d8..bf4fb9e 100644 --- a/nvbench/detail/state_exec.cuh +++ b/nvbench/detail/state_exec.cuh @@ -101,10 +101,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) "`set_is_cpu_only(true)` is NOT called when defining the benchmark."); } - // Syncing will cause the blocking kernel pattern to deadlock: if constexpr (modifier_tags & sync) { + // Syncing will cause the blocking kernel pattern to deadlock: this->set_disable_blocking_kernel(true); + // Syncing will cause the throttle frequency measurements to be skewed heavily: + this->set_throttle_threshold(0.f); } if (this->is_skipped())