Merge pull request #211 from alliepiper/clock_api

Fetch clock rates using cudaDeviceGetAttribute.
2026-03-14 20:27:24 +00:00 · 2025-04-14 17:12:42 -04:00
parent 33fc77aabc 0c56311174
commit eadb913322
2 changed files with 16 additions and 6 deletions
--- a/nvbench/device_info.cu
+++ b/nvbench/device_info.cu
@@ -44,6 +44,16 @@ device_info::device_info(int id)
    , m_nvml_device(nullptr)
 {
  NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
+
+  int val{};
+  NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id));
+  // kHz -> Hz
+  m_sm_default_clock_rate = static_cast<std::size_t>(val) * 1000;
+
+  NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id));
+  // kHz -> Hz
+  m_global_memory_bus_peak_clock_rate = static_cast<std::size_t>(val) * 1000;
+
  // NVML's lifetime should extend for the entirety of the process, so store in a
  // global.
  [[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager();
--- a/nvbench/device_info.cuh
+++ b/nvbench/device_info.cuh
@@ -106,10 +106,7 @@ struct device_info
  }

  /// @return The default clock rate of the SM in Hz.
-  [[nodiscard]] std::size_t get_sm_default_clock_rate() const
-  { // kHz -> Hz
-    return static_cast<std::size_t>(m_prop.clockRate) * 1000;
-  }
+  [[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; }

  /// @return The number of physical streaming multiprocessors on this device.
  [[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; }
@@ -143,8 +140,8 @@ struct device_info

  /// @return The peak clock rate of the global memory bus in Hz.
  [[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const
-  { // kHz -> Hz
-    return static_cast<std::size_t>(m_prop.memoryClockRate) * 1000;
+  {
+    return m_global_memory_bus_peak_clock_rate;
  }

  /// @return The width of the global memory bus in bits.
@@ -200,6 +197,9 @@ private:
  int m_id;
  cudaDeviceProp m_prop;
  nvmlDevice_st *m_nvml_device;
+
+  std::size_t m_sm_default_clock_rate;
+  std::size_t m_global_memory_bus_peak_clock_rate;
 };

 // get_ptx_version implementation; this needs to stay in the header so it will