mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Merge pull request #211 from alliepiper/clock_api
Fetch clock rates using cudaDeviceGetAttribute.
This commit is contained in:
@@ -44,6 +44,16 @@ device_info::device_info(int id)
|
||||
, m_nvml_device(nullptr)
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
|
||||
|
||||
int val{};
|
||||
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id));
|
||||
// kHz -> Hz
|
||||
m_sm_default_clock_rate = static_cast<std::size_t>(val) * 1000;
|
||||
|
||||
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id));
|
||||
// kHz -> Hz
|
||||
m_global_memory_bus_peak_clock_rate = static_cast<std::size_t>(val) * 1000;
|
||||
|
||||
// NVML's lifetime should extend for the entirety of the process, so store in a
|
||||
// global.
|
||||
[[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager();
|
||||
|
||||
@@ -106,10 +106,7 @@ struct device_info
|
||||
}
|
||||
|
||||
/// @return The default clock rate of the SM in Hz.
|
||||
[[nodiscard]] std::size_t get_sm_default_clock_rate() const
|
||||
{ // kHz -> Hz
|
||||
return static_cast<std::size_t>(m_prop.clockRate) * 1000;
|
||||
}
|
||||
[[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; }
|
||||
|
||||
/// @return The number of physical streaming multiprocessors on this device.
|
||||
[[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; }
|
||||
@@ -143,8 +140,8 @@ struct device_info
|
||||
|
||||
/// @return The peak clock rate of the global memory bus in Hz.
|
||||
[[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const
|
||||
{ // kHz -> Hz
|
||||
return static_cast<std::size_t>(m_prop.memoryClockRate) * 1000;
|
||||
{
|
||||
return m_global_memory_bus_peak_clock_rate;
|
||||
}
|
||||
|
||||
/// @return The width of the global memory bus in bits.
|
||||
@@ -200,6 +197,9 @@ private:
|
||||
int m_id;
|
||||
cudaDeviceProp m_prop;
|
||||
nvmlDevice_st *m_nvml_device;
|
||||
|
||||
std::size_t m_sm_default_clock_rate;
|
||||
std::size_t m_global_memory_bus_peak_clock_rate;
|
||||
};
|
||||
|
||||
// get_ptx_version implementation; this needs to stay in the header so it will
|
||||
|
||||
Reference in New Issue
Block a user