mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-28 02:31:14 +00:00
Replace references to peak_sm_clock with default_sm_clock.
The actual measured clock speed can exceed this value, so default is less confusing than peak.
This commit is contained in:
@@ -90,7 +90,7 @@
|
|||||||
before any `--benchmark` arguments.
|
before any `--benchmark` arguments.
|
||||||
|
|
||||||
* `--stopping-criterion <criterion>`
|
* `--stopping-criterion <criterion>`
|
||||||
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
|
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
|
||||||
samples were collected.
|
samples were collected.
|
||||||
* Only applies to Cold measurements.
|
* Only applies to Cold measurements.
|
||||||
* Default is stdrel (`--stopping-criterion stdrel`)
|
* Default is stdrel (`--stopping-criterion stdrel`)
|
||||||
@@ -134,7 +134,7 @@
|
|||||||
before any `--benchmark` arguments.
|
before any `--benchmark` arguments.
|
||||||
|
|
||||||
* `--throttle-threshold <value>`
|
* `--throttle-threshold <value>`
|
||||||
* Set the GPU throttle threshold as percentage of the peak clock rate.
|
* Set the GPU throttle threshold as percentage of the device's default clock rate.
|
||||||
* Default is 75%.
|
* Default is 75%.
|
||||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||||
before any `--benchmark` arguments.
|
before any `--benchmark` arguments.
|
||||||
|
|||||||
@@ -302,7 +302,7 @@ protected:
|
|||||||
nvbench::float64_t m_skip_time{-1.};
|
nvbench::float64_t m_skip_time{-1.};
|
||||||
nvbench::float64_t m_timeout{15.};
|
nvbench::float64_t m_timeout{15.};
|
||||||
|
|
||||||
nvbench::float32_t m_throttle_threshold{0.75f}; // [% of peak SM clock rate]
|
nvbench::float32_t m_throttle_threshold{0.75f}; // [% of default SM clock rate]
|
||||||
nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds]
|
nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds]
|
||||||
|
|
||||||
nvbench::criterion_params m_criterion_params;
|
nvbench::criterion_params m_criterion_params;
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ struct gpu_frequency
|
|||||||
|
|
||||||
void stop(const nvbench::cuda_stream &stream) { m_stop.record(stream); }
|
void stop(const nvbench::cuda_stream &stream) { m_stop.record(stream); }
|
||||||
|
|
||||||
[[nodiscard]] bool has_throttled(nvbench::float32_t peak_sm_clock_rate_hz,
|
[[nodiscard]] bool has_throttled(nvbench::float32_t default_sm_clock_rate_hz,
|
||||||
nvbench::float32_t throttle_threshold);
|
nvbench::float32_t throttle_threshold);
|
||||||
|
|
||||||
[[nodiscard]] nvbench::float32_t get_clock_frequency();
|
[[nodiscard]] nvbench::float32_t get_clock_frequency();
|
||||||
|
|||||||
@@ -31,10 +31,10 @@ nvbench::float32_t gpu_frequency::get_clock_frequency()
|
|||||||
return clock_rate;
|
return clock_rate;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gpu_frequency::has_throttled(nvbench::float32_t peak_sm_clock_rate_hz,
|
bool gpu_frequency::has_throttled(nvbench::float32_t default_sm_clock_rate_hz,
|
||||||
nvbench::float32_t throttle_threshold)
|
nvbench::float32_t throttle_threshold)
|
||||||
{
|
{
|
||||||
float threshold = peak_sm_clock_rate_hz * throttle_threshold;
|
float threshold = default_sm_clock_rate_hz * throttle_threshold;
|
||||||
|
|
||||||
if (this->get_clock_frequency() < threshold)
|
if (this->get_clock_frequency() < threshold)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ protected:
|
|||||||
nvbench::float64_t m_skip_time{};
|
nvbench::float64_t m_skip_time{};
|
||||||
nvbench::float64_t m_timeout{};
|
nvbench::float64_t m_timeout{};
|
||||||
|
|
||||||
nvbench::float32_t m_throttle_threshold; // [% of peak SM clock rate]
|
nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate]
|
||||||
nvbench::float32_t m_throttle_recovery_delay; // [seconds]
|
nvbench::float32_t m_throttle_recovery_delay; // [seconds]
|
||||||
|
|
||||||
nvbench::int64_t m_total_samples{};
|
nvbench::int64_t m_total_samples{};
|
||||||
|
|||||||
@@ -331,7 +331,7 @@ private:
|
|||||||
nvbench::float64_t m_skip_time;
|
nvbench::float64_t m_skip_time;
|
||||||
nvbench::float64_t m_timeout;
|
nvbench::float64_t m_timeout;
|
||||||
|
|
||||||
nvbench::float32_t m_throttle_threshold; // [% of peak SM clock rate]
|
nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate]
|
||||||
nvbench::float32_t m_throttle_recovery_delay; // [seconds]
|
nvbench::float32_t m_throttle_recovery_delay; // [seconds]
|
||||||
|
|
||||||
// Deadlock protection. See blocking_kernel's class doc for details.
|
// Deadlock protection. See blocking_kernel's class doc for details.
|
||||||
|
|||||||
Reference in New Issue
Block a user