From 0c5631117401ff0b07f4ef4f2e4271def383eefe Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Mon, 14 Apr 2025 16:59:54 -0400 Subject: [PATCH] Fetch clock rates using cudaDeviceGetAttribute. --- nvbench/device_info.cu | 10 ++++++++++ nvbench/device_info.cuh | 12 ++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu index b7a6c05..c19a6f2 100644 --- a/nvbench/device_info.cu +++ b/nvbench/device_info.cu @@ -44,6 +44,16 @@ device_info::device_info(int id) , m_nvml_device(nullptr) { NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id)); + + int val{}; + NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id)); + // kHz -> Hz + m_sm_default_clock_rate = static_cast(val) * 1000; + + NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id)); + // kHz -> Hz + m_global_memory_bus_peak_clock_rate = static_cast(val) * 1000; + // NVML's lifetime should extend for the entirety of the process, so store in a // global. [[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager(); diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh index b31a096..cce5a88 100644 --- a/nvbench/device_info.cuh +++ b/nvbench/device_info.cuh @@ -106,10 +106,7 @@ struct device_info } /// @return The default clock rate of the SM in Hz. - [[nodiscard]] std::size_t get_sm_default_clock_rate() const - { // kHz -> Hz - return static_cast(m_prop.clockRate) * 1000; - } + [[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; } /// @return The number of physical streaming multiprocessors on this device. [[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; } @@ -143,8 +140,8 @@ struct device_info /// @return The peak clock rate of the global memory bus in Hz. [[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const - { // kHz -> Hz - return static_cast(m_prop.memoryClockRate) * 1000; + { + return m_global_memory_bus_peak_clock_rate; } /// @return The width of the global memory bus in bits. @@ -200,6 +197,9 @@ private: int m_id; cudaDeviceProp m_prop; nvmlDevice_st *m_nvml_device; + + std::size_t m_sm_default_clock_rate; + std::size_t m_global_memory_bus_peak_clock_rate; }; // get_ptx_version implementation; this needs to stay in the header so it will