From 0c5631117401ff0b07f4ef4f2e4271def383eefe Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Mon, 14 Apr 2025 16:59:54 -0400
Subject: [PATCH] Fetch clock rates using cudaDeviceGetAttribute.

---
 nvbench/device_info.cu  | 10 ++++++++++
 nvbench/device_info.cuh | 12 ++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu
index b7a6c05..c19a6f2 100644
--- a/nvbench/device_info.cu
+++ b/nvbench/device_info.cu
@@ -44,6 +44,16 @@ device_info::device_info(int id)
     , m_nvml_device(nullptr)
 {
   NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
+
+  int val{};
+  NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id));
+  // kHz -> Hz
+  m_sm_default_clock_rate = static_cast<std::size_t>(val) * 1000;
+
+  NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id));
+  // kHz -> Hz
+  m_global_memory_bus_peak_clock_rate = static_cast<std::size_t>(val) * 1000;
+
   // NVML's lifetime should extend for the entirety of the process, so store in a
   // global.
   [[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager();
diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh
index b31a096..cce5a88 100644
--- a/nvbench/device_info.cuh
+++ b/nvbench/device_info.cuh
@@ -106,10 +106,7 @@ struct device_info
   }
 
   /// @return The default clock rate of the SM in Hz.
-  [[nodiscard]] std::size_t get_sm_default_clock_rate() const
-  { // kHz -> Hz
-    return static_cast<std::size_t>(m_prop.clockRate) * 1000;
-  }
+  [[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; }
 
   /// @return The number of physical streaming multiprocessors on this device.
   [[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; }
@@ -143,8 +140,8 @@ struct device_info
 
   /// @return The peak clock rate of the global memory bus in Hz.
   [[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const
-  { // kHz -> Hz
-    return static_cast<std::size_t>(m_prop.memoryClockRate) * 1000;
+  {
+    return m_global_memory_bus_peak_clock_rate;
   }
 
   /// @return The width of the global memory bus in bits.
@@ -200,6 +197,9 @@ private:
   int m_id;
   cudaDeviceProp m_prop;
   nvmlDevice_st *m_nvml_device;
+
+  std::size_t m_sm_default_clock_rate;
+  std::size_t m_global_memory_bus_peak_clock_rate;
 };
 
 // get_ptx_version implementation; this needs to stay in the header so it will