Replace references to peak_sm_clock with default_sm_clock.

The actual measured clock speed can exceed this value, so default is less confusing than peak.
2026-05-11 08:50:03 +00:00 · 2025-04-14 11:33:04 -04:00
parent 87dd03254f
commit 18926ced87
6 changed files with 8 additions and 8 deletions
--- a/docs/cli_help.md
+++ b/docs/cli_help.md
@@ -90,7 +90,7 @@
    before any `--benchmark` arguments.

 * `--stopping-criterion <criterion>`
-  * After `--min-samples` is satisfied, use `<criterion>` to detect if enough 
+  * After `--min-samples` is satisfied, use `<criterion>` to detect if enough
    samples were collected.
  * Only applies to Cold measurements.
  * Default is stdrel (`--stopping-criterion stdrel`)
@@ -134,7 +134,7 @@
    before any `--benchmark` arguments.

 * `--throttle-threshold <value>`
-  * Set the GPU throttle threshold as percentage of the peak clock rate.
+  * Set the GPU throttle threshold as percentage of the device's default clock rate.
  * Default is 75%.
  * Applies to the most recent `--benchmark`, or all benchmarks if specified
    before any `--benchmark` arguments.
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -302,7 +302,7 @@ protected:
  nvbench::float64_t m_skip_time{-1.};
  nvbench::float64_t m_timeout{15.};

-  nvbench::float32_t m_throttle_threshold{0.75f};      // [% of peak SM clock rate]
+  nvbench::float32_t m_throttle_threshold{0.75f};      // [% of default SM clock rate]
  nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds]

  nvbench::criterion_params m_criterion_params;
--- a/nvbench/detail/gpu_frequency.cuh
+++ b/nvbench/detail/gpu_frequency.cuh
@@ -40,7 +40,7 @@ struct gpu_frequency

  void stop(const nvbench::cuda_stream &stream) { m_stop.record(stream); }

-  [[nodiscard]] bool has_throttled(nvbench::float32_t peak_sm_clock_rate_hz,
+  [[nodiscard]] bool has_throttled(nvbench::float32_t default_sm_clock_rate_hz,
                                   nvbench::float32_t throttle_threshold);

  [[nodiscard]] nvbench::float32_t get_clock_frequency();
--- a/nvbench/detail/gpu_frequency.cxx
+++ b/nvbench/detail/gpu_frequency.cxx
@@ -31,10 +31,10 @@ nvbench::float32_t gpu_frequency::get_clock_frequency()
  return clock_rate;
 }

-bool gpu_frequency::has_throttled(nvbench::float32_t peak_sm_clock_rate_hz,
+bool gpu_frequency::has_throttled(nvbench::float32_t default_sm_clock_rate_hz,
                                  nvbench::float32_t throttle_threshold)
 {
-  float threshold = peak_sm_clock_rate_hz * throttle_threshold;
+  float threshold = default_sm_clock_rate_hz * throttle_threshold;

  if (this->get_clock_frequency() < threshold)
  {
--- a/nvbench/detail/measure_cold.cuh
+++ b/nvbench/detail/measure_cold.cuh
@@ -102,7 +102,7 @@ protected:
  nvbench::float64_t m_skip_time{};
  nvbench::float64_t m_timeout{};

-  nvbench::float32_t m_throttle_threshold;      // [% of peak SM clock rate]
+  nvbench::float32_t m_throttle_threshold;      // [% of default SM clock rate]
  nvbench::float32_t m_throttle_recovery_delay; // [seconds]

  nvbench::int64_t m_total_samples{};
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -331,7 +331,7 @@ private:
  nvbench::float64_t m_skip_time;
  nvbench::float64_t m_timeout;

-  nvbench::float32_t m_throttle_threshold;      // [% of peak SM clock rate]
+  nvbench::float32_t m_throttle_threshold;      // [% of default SM clock rate]
  nvbench::float32_t m_throttle_recovery_delay; // [seconds]

  // Deadlock protection. See blocking_kernel's class doc for details.