Add helper methods to configure throughput.

Instead of:

```
state.set_element_count(size);
state.set_global_memory_bytes_accessed(
  size * (sizeof(InT) + sizeof(OutT)));
```

do:

```
state.add_element_count(size, "Elements");
state.add_global_memory_read<InT>(size, "InputSize");
state.add_global_memory_write<InT>(size, "OutputSize");
```

The string arguments are optional. If provided, a new column will
be added to the output with the indicated name and number
of bytes (or elements for `add_element_count`).
This commit is contained in:
Allison Vacanti
2021-02-18 15:47:59 -05:00
parent dcd5d1ffa6
commit 7657036f9c
3 changed files with 69 additions and 15 deletions

View File

@@ -46,7 +46,7 @@ void measure_cold_base::check()
void measure_cold_base::generate_summaries()
{
const auto d_samples = static_cast<double>(m_total_samples);
const auto d_samples = static_cast<double>(m_total_samples);
{
auto &summ = m_state.add_summary("Number of Samples (Cold)");
summ.set_string("hint", "sample_size");
@@ -98,17 +98,17 @@ void measure_cold_base::generate_summaries()
summ.set_float64("value", m_cuda_noise);
}
if (const auto items = m_state.get_items_processed_per_launch(); items != 0)
if (const auto items = m_state.get_element_count(); items != 0)
{
auto &summ = m_state.add_summary("Element Throughput");
summ.set_string("hint", "item_rate");
summ.set_string("short_name", "Elem/s");
summ.set_string("description", "Number of input elements handled per second.");
summ.set_string("description",
"Number of input elements handled per second.");
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
}
if (const auto bytes = m_state.get_global_bytes_accessed_per_launch();
bytes != 0)
if (const auto bytes = m_state.get_global_memory_rw_bytes(); bytes != 0)
{
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
{

View File

@@ -149,4 +149,40 @@ std::string state::get_short_description() const
return fmt::to_string(buffer);
}
void state::add_element_count(std::size_t elements,
std::string column_name)
{
m_element_count += static_cast<nvbench::int64_t>(elements);
if (!column_name.empty())
{
auto &summ = this->add_summary("Element count: " + column_name);
summ.set_string("short_name", std::move(column_name));
summ.set_int64("value", static_cast<nvbench::int64_t>(elements));
}
}
void state::add_global_memory_reads(std::size_t bytes, std::string column_name)
{
m_global_memory_rw_bytes += static_cast<nvbench::int64_t>(bytes);
if (!column_name.empty())
{
auto &summ = this->add_summary("Input Buffer Size: " + column_name);
summ.set_string("hint", "bytes");
summ.set_string("short_name", std::move(column_name));
summ.set_int64("value", static_cast<nvbench::int64_t>(bytes));
}
}
void state::add_global_memory_writes(std::size_t bytes, std::string column_name)
{
m_global_memory_rw_bytes += static_cast<nvbench::int64_t>(bytes);
if (!column_name.empty())
{
auto &summ = this->add_summary("Output Buffer Size: " + column_name);
summ.set_string("hint", "bytes");
summ.set_string("short_name", std::move(column_name));
summ.set_int64("value", static_cast<nvbench::int64_t>(bytes));
}
}
} // namespace nvbench

View File

@@ -66,22 +66,40 @@ struct state
[[nodiscard]] const std::string &
get_string(const std::string &axis_name) const;
void set_items_processed_per_launch(nvbench::int64_t items)
void add_element_count(std::size_t elements, std::string column_name = {});
void set_element_count(nvbench::int64_t elements)
{
m_items_processed_per_launch = items;
m_element_count = elements;
}
[[nodiscard]] nvbench::int64_t get_items_processed_per_launch() const
[[nodiscard]] nvbench::int64_t get_element_count() const
{
return m_items_processed_per_launch;
return m_element_count;
}
void set_global_bytes_accessed_per_launch(nvbench::int64_t bytes)
template <typename ElementType>
void add_global_memory_reads(std::size_t count, std::string column_name = {})
{
m_global_bytes_accessed_per_launch = bytes;
this->add_global_memory_reads(count * sizeof(ElementType),
std::move(column_name));
}
[[nodiscard]] nvbench::int64_t get_global_bytes_accessed_per_launch() const
void add_global_memory_reads(std::size_t bytes, std::string column_name);
template <typename ElementType>
void add_global_memory_writes(std::size_t count, std::string column_name = {})
{
return m_global_bytes_accessed_per_launch;
this->add_global_memory_writes(count * sizeof(ElementType),
std::move(column_name));
}
void add_global_memory_writes(std::size_t bytes, std::string column_name);
void set_global_memory_rw_bytes(nvbench::int64_t bytes)
{
m_global_memory_rw_bytes = bytes;
}
[[nodiscard]] nvbench::int64_t get_global_memory_rw_bytes() const
{
return m_global_memory_rw_bytes;
}
void skip(std::string reason) { m_skip_reason = std::move(reason); }
@@ -196,8 +214,8 @@ private:
std::vector<nvbench::summary> m_summaries;
std::string m_skip_reason;
nvbench::int64_t m_items_processed_per_launch{};
nvbench::int64_t m_global_bytes_accessed_per_launch{};
nvbench::int64_t m_element_count{};
nvbench::int64_t m_global_memory_rw_bytes{};
};
} // namespace nvbench