diff --git a/docs/cli_help.md b/docs/cli_help.md index 424c1be..336a826 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -90,7 +90,7 @@ before any `--benchmark` arguments. * `--stopping-criterion ` - * After `--min-samples` is satisfied, use `` to detect if enough + * After `--min-samples` is satisfied, use `` to detect if enough samples were collected. * Only applies to Cold measurements. * Default is stdrel (`--stopping-criterion stdrel`) @@ -134,7 +134,7 @@ before any `--benchmark` arguments. * `--throttle-threshold ` - * Set the GPU throttle threshold as percentage of the peak clock rate. + * Set the GPU throttle threshold as percentage of the device's default clock rate. * Default is 75%. * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index d35779c..58c55b6 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -302,7 +302,7 @@ protected: nvbench::float64_t m_skip_time{-1.}; nvbench::float64_t m_timeout{15.}; - nvbench::float32_t m_throttle_threshold{0.75f}; // [% of peak SM clock rate] + nvbench::float32_t m_throttle_threshold{0.75f}; // [% of default SM clock rate] nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds] nvbench::criterion_params m_criterion_params; diff --git a/nvbench/detail/gpu_frequency.cuh b/nvbench/detail/gpu_frequency.cuh index 320d3d2..dc0dcd7 100644 --- a/nvbench/detail/gpu_frequency.cuh +++ b/nvbench/detail/gpu_frequency.cuh @@ -40,7 +40,7 @@ struct gpu_frequency void stop(const nvbench::cuda_stream &stream) { m_stop.record(stream); } - [[nodiscard]] bool has_throttled(nvbench::float32_t peak_sm_clock_rate_hz, + [[nodiscard]] bool has_throttled(nvbench::float32_t default_sm_clock_rate_hz, nvbench::float32_t throttle_threshold); [[nodiscard]] nvbench::float32_t get_clock_frequency(); diff --git a/nvbench/detail/gpu_frequency.cxx b/nvbench/detail/gpu_frequency.cxx index 8f2d19b..adc1011 100644 --- a/nvbench/detail/gpu_frequency.cxx +++ b/nvbench/detail/gpu_frequency.cxx @@ -31,10 +31,10 @@ nvbench::float32_t gpu_frequency::get_clock_frequency() return clock_rate; } -bool gpu_frequency::has_throttled(nvbench::float32_t peak_sm_clock_rate_hz, +bool gpu_frequency::has_throttled(nvbench::float32_t default_sm_clock_rate_hz, nvbench::float32_t throttle_threshold) { - float threshold = peak_sm_clock_rate_hz * throttle_threshold; + float threshold = default_sm_clock_rate_hz * throttle_threshold; if (this->get_clock_frequency() < threshold) { diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index 80f8e5a..52da8f2 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -102,7 +102,7 @@ protected: nvbench::float64_t m_skip_time{}; nvbench::float64_t m_timeout{}; - nvbench::float32_t m_throttle_threshold; // [% of peak SM clock rate] + nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate] nvbench::float32_t m_throttle_recovery_delay; // [seconds] nvbench::int64_t m_total_samples{}; diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 9b0a5c1..0691dc6 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -331,7 +331,7 @@ private: nvbench::float64_t m_skip_time; nvbench::float64_t m_timeout; - nvbench::float32_t m_throttle_threshold; // [% of peak SM clock rate] + nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate] nvbench::float32_t m_throttle_recovery_delay; // [seconds] // Deadlock protection. See blocking_kernel's class doc for details.