Replace references to peak_sm_clock with default_sm_clock.

The actual measured clock speed can exceed this value, so default is less confusing than peak.
This commit is contained in:
Allison Piper
2025-04-14 11:33:04 -04:00
parent 87dd03254f
commit 18926ced87
6 changed files with 8 additions and 8 deletions

View File

@@ -90,7 +90,7 @@
before any `--benchmark` arguments. before any `--benchmark` arguments.
* `--stopping-criterion <criterion>` * `--stopping-criterion <criterion>`
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough * After `--min-samples` is satisfied, use `<criterion>` to detect if enough
samples were collected. samples were collected.
* Only applies to Cold measurements. * Only applies to Cold measurements.
* Default is stdrel (`--stopping-criterion stdrel`) * Default is stdrel (`--stopping-criterion stdrel`)
@@ -134,7 +134,7 @@
before any `--benchmark` arguments. before any `--benchmark` arguments.
* `--throttle-threshold <value>` * `--throttle-threshold <value>`
* Set the GPU throttle threshold as percentage of the peak clock rate. * Set the GPU throttle threshold as percentage of the device's default clock rate.
* Default is 75%. * Default is 75%.
* Applies to the most recent `--benchmark`, or all benchmarks if specified * Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments. before any `--benchmark` arguments.

View File

@@ -302,7 +302,7 @@ protected:
nvbench::float64_t m_skip_time{-1.}; nvbench::float64_t m_skip_time{-1.};
nvbench::float64_t m_timeout{15.}; nvbench::float64_t m_timeout{15.};
nvbench::float32_t m_throttle_threshold{0.75f}; // [% of peak SM clock rate] nvbench::float32_t m_throttle_threshold{0.75f}; // [% of default SM clock rate]
nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds] nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds]
nvbench::criterion_params m_criterion_params; nvbench::criterion_params m_criterion_params;

View File

@@ -40,7 +40,7 @@ struct gpu_frequency
void stop(const nvbench::cuda_stream &stream) { m_stop.record(stream); } void stop(const nvbench::cuda_stream &stream) { m_stop.record(stream); }
[[nodiscard]] bool has_throttled(nvbench::float32_t peak_sm_clock_rate_hz, [[nodiscard]] bool has_throttled(nvbench::float32_t default_sm_clock_rate_hz,
nvbench::float32_t throttle_threshold); nvbench::float32_t throttle_threshold);
[[nodiscard]] nvbench::float32_t get_clock_frequency(); [[nodiscard]] nvbench::float32_t get_clock_frequency();

View File

@@ -31,10 +31,10 @@ nvbench::float32_t gpu_frequency::get_clock_frequency()
return clock_rate; return clock_rate;
} }
bool gpu_frequency::has_throttled(nvbench::float32_t peak_sm_clock_rate_hz, bool gpu_frequency::has_throttled(nvbench::float32_t default_sm_clock_rate_hz,
nvbench::float32_t throttle_threshold) nvbench::float32_t throttle_threshold)
{ {
float threshold = peak_sm_clock_rate_hz * throttle_threshold; float threshold = default_sm_clock_rate_hz * throttle_threshold;
if (this->get_clock_frequency() < threshold) if (this->get_clock_frequency() < threshold)
{ {

View File

@@ -102,7 +102,7 @@ protected:
nvbench::float64_t m_skip_time{}; nvbench::float64_t m_skip_time{};
nvbench::float64_t m_timeout{}; nvbench::float64_t m_timeout{};
nvbench::float32_t m_throttle_threshold; // [% of peak SM clock rate] nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate]
nvbench::float32_t m_throttle_recovery_delay; // [seconds] nvbench::float32_t m_throttle_recovery_delay; // [seconds]
nvbench::int64_t m_total_samples{}; nvbench::int64_t m_total_samples{};

View File

@@ -331,7 +331,7 @@ private:
nvbench::float64_t m_skip_time; nvbench::float64_t m_skip_time;
nvbench::float64_t m_timeout; nvbench::float64_t m_timeout;
nvbench::float32_t m_throttle_threshold; // [% of peak SM clock rate] nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate]
nvbench::float32_t m_throttle_recovery_delay; // [seconds] nvbench::float32_t m_throttle_recovery_delay; // [seconds]
// Deadlock protection. See blocking_kernel's class doc for details. // Deadlock protection. See blocking_kernel's class doc for details.