Merge pull request #211 from alliepiper/clock_api

Fetch clock rates using cudaDeviceGetAttribute.
This commit is contained in:
Allison Piper
2025-04-14 17:12:42 -04:00
committed by GitHub
2 changed files with 16 additions and 6 deletions

View File

@@ -44,6 +44,16 @@ device_info::device_info(int id)
, m_nvml_device(nullptr)
{
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
int val{};
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id));
// kHz -> Hz
m_sm_default_clock_rate = static_cast<std::size_t>(val) * 1000;
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id));
// kHz -> Hz
m_global_memory_bus_peak_clock_rate = static_cast<std::size_t>(val) * 1000;
// NVML's lifetime should extend for the entirety of the process, so store in a
// global.
[[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager();

View File

@@ -106,10 +106,7 @@ struct device_info
}
/// @return The default clock rate of the SM in Hz.
[[nodiscard]] std::size_t get_sm_default_clock_rate() const
{ // kHz -> Hz
return static_cast<std::size_t>(m_prop.clockRate) * 1000;
}
[[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; }
/// @return The number of physical streaming multiprocessors on this device.
[[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; }
@@ -143,8 +140,8 @@ struct device_info
/// @return The peak clock rate of the global memory bus in Hz.
[[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const
{ // kHz -> Hz
return static_cast<std::size_t>(m_prop.memoryClockRate) * 1000;
{
return m_global_memory_bus_peak_clock_rate;
}
/// @return The width of the global memory bus in bits.
@@ -200,6 +197,9 @@ private:
int m_id;
cudaDeviceProp m_prop;
nvmlDevice_st *m_nvml_device;
std::size_t m_sm_default_clock_rate;
std::size_t m_global_memory_bus_peak_clock_rate;
};
// get_ptx_version implementation; this needs to stay in the header so it will