mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Add helper methods to configure throughput.
Instead of: ``` state.set_element_count(size); state.set_global_memory_bytes_accessed( size * (sizeof(InT) + sizeof(OutT))); ``` do: ``` state.add_element_count(size, "Elements"); state.add_global_memory_read<InT>(size, "InputSize"); state.add_global_memory_write<InT>(size, "OutputSize"); ``` The string arguments are optional. If provided, a new column will be added to the output with the indicated name and number of bytes (or elements for `add_element_count`).
This commit is contained in:
@@ -46,7 +46,7 @@ void measure_cold_base::check()
|
||||
|
||||
void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
{
|
||||
auto &summ = m_state.add_summary("Number of Samples (Cold)");
|
||||
summ.set_string("hint", "sample_size");
|
||||
@@ -98,17 +98,17 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_float64("value", m_cuda_noise);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_items_processed_per_launch(); items != 0)
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
{
|
||||
auto &summ = m_state.add_summary("Element Throughput");
|
||||
summ.set_string("hint", "item_rate");
|
||||
summ.set_string("short_name", "Elem/s");
|
||||
summ.set_string("description", "Number of input elements handled per second.");
|
||||
summ.set_string("description",
|
||||
"Number of input elements handled per second.");
|
||||
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
|
||||
}
|
||||
|
||||
if (const auto bytes = m_state.get_global_bytes_accessed_per_launch();
|
||||
bytes != 0)
|
||||
if (const auto bytes = m_state.get_global_memory_rw_bytes(); bytes != 0)
|
||||
{
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
|
||||
{
|
||||
|
||||
@@ -149,4 +149,40 @@ std::string state::get_short_description() const
|
||||
return fmt::to_string(buffer);
|
||||
}
|
||||
|
||||
void state::add_element_count(std::size_t elements,
|
||||
std::string column_name)
|
||||
{
|
||||
m_element_count += static_cast<nvbench::int64_t>(elements);
|
||||
if (!column_name.empty())
|
||||
{
|
||||
auto &summ = this->add_summary("Element count: " + column_name);
|
||||
summ.set_string("short_name", std::move(column_name));
|
||||
summ.set_int64("value", static_cast<nvbench::int64_t>(elements));
|
||||
}
|
||||
}
|
||||
|
||||
void state::add_global_memory_reads(std::size_t bytes, std::string column_name)
|
||||
{
|
||||
m_global_memory_rw_bytes += static_cast<nvbench::int64_t>(bytes);
|
||||
if (!column_name.empty())
|
||||
{
|
||||
auto &summ = this->add_summary("Input Buffer Size: " + column_name);
|
||||
summ.set_string("hint", "bytes");
|
||||
summ.set_string("short_name", std::move(column_name));
|
||||
summ.set_int64("value", static_cast<nvbench::int64_t>(bytes));
|
||||
}
|
||||
}
|
||||
|
||||
void state::add_global_memory_writes(std::size_t bytes, std::string column_name)
|
||||
{
|
||||
m_global_memory_rw_bytes += static_cast<nvbench::int64_t>(bytes);
|
||||
if (!column_name.empty())
|
||||
{
|
||||
auto &summ = this->add_summary("Output Buffer Size: " + column_name);
|
||||
summ.set_string("hint", "bytes");
|
||||
summ.set_string("short_name", std::move(column_name));
|
||||
summ.set_int64("value", static_cast<nvbench::int64_t>(bytes));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -66,22 +66,40 @@ struct state
|
||||
[[nodiscard]] const std::string &
|
||||
get_string(const std::string &axis_name) const;
|
||||
|
||||
void set_items_processed_per_launch(nvbench::int64_t items)
|
||||
void add_element_count(std::size_t elements, std::string column_name = {});
|
||||
|
||||
void set_element_count(nvbench::int64_t elements)
|
||||
{
|
||||
m_items_processed_per_launch = items;
|
||||
m_element_count = elements;
|
||||
}
|
||||
[[nodiscard]] nvbench::int64_t get_items_processed_per_launch() const
|
||||
[[nodiscard]] nvbench::int64_t get_element_count() const
|
||||
{
|
||||
return m_items_processed_per_launch;
|
||||
return m_element_count;
|
||||
}
|
||||
|
||||
void set_global_bytes_accessed_per_launch(nvbench::int64_t bytes)
|
||||
template <typename ElementType>
|
||||
void add_global_memory_reads(std::size_t count, std::string column_name = {})
|
||||
{
|
||||
m_global_bytes_accessed_per_launch = bytes;
|
||||
this->add_global_memory_reads(count * sizeof(ElementType),
|
||||
std::move(column_name));
|
||||
}
|
||||
[[nodiscard]] nvbench::int64_t get_global_bytes_accessed_per_launch() const
|
||||
void add_global_memory_reads(std::size_t bytes, std::string column_name);
|
||||
|
||||
template <typename ElementType>
|
||||
void add_global_memory_writes(std::size_t count, std::string column_name = {})
|
||||
{
|
||||
return m_global_bytes_accessed_per_launch;
|
||||
this->add_global_memory_writes(count * sizeof(ElementType),
|
||||
std::move(column_name));
|
||||
}
|
||||
void add_global_memory_writes(std::size_t bytes, std::string column_name);
|
||||
|
||||
void set_global_memory_rw_bytes(nvbench::int64_t bytes)
|
||||
{
|
||||
m_global_memory_rw_bytes = bytes;
|
||||
}
|
||||
[[nodiscard]] nvbench::int64_t get_global_memory_rw_bytes() const
|
||||
{
|
||||
return m_global_memory_rw_bytes;
|
||||
}
|
||||
|
||||
void skip(std::string reason) { m_skip_reason = std::move(reason); }
|
||||
@@ -196,8 +214,8 @@ private:
|
||||
|
||||
std::vector<nvbench::summary> m_summaries;
|
||||
std::string m_skip_reason;
|
||||
nvbench::int64_t m_items_processed_per_launch{};
|
||||
nvbench::int64_t m_global_bytes_accessed_per_launch{};
|
||||
nvbench::int64_t m_element_count{};
|
||||
nvbench::int64_t m_global_memory_rw_bytes{};
|
||||
};
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
Reference in New Issue
Block a user