diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 09558ce..ad3366e 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -46,7 +46,7 @@ void measure_cold_base::check() void measure_cold_base::generate_summaries() { - const auto d_samples = static_cast(m_total_samples); + const auto d_samples = static_cast(m_total_samples); { auto &summ = m_state.add_summary("Number of Samples (Cold)"); summ.set_string("hint", "sample_size"); @@ -98,17 +98,17 @@ void measure_cold_base::generate_summaries() summ.set_float64("value", m_cuda_noise); } - if (const auto items = m_state.get_items_processed_per_launch(); items != 0) + if (const auto items = m_state.get_element_count(); items != 0) { auto &summ = m_state.add_summary("Element Throughput"); summ.set_string("hint", "item_rate"); summ.set_string("short_name", "Elem/s"); - summ.set_string("description", "Number of input elements handled per second."); + summ.set_string("description", + "Number of input elements handled per second."); summ.set_float64("value", static_cast(items) / avg_cuda_time); } - if (const auto bytes = m_state.get_global_bytes_accessed_per_launch(); - bytes != 0) + if (const auto bytes = m_state.get_global_memory_rw_bytes(); bytes != 0) { const auto avg_used_gmem_bw = static_cast(bytes) / avg_cuda_time; { diff --git a/nvbench/state.cu b/nvbench/state.cu index a3361ea..352c617 100644 --- a/nvbench/state.cu +++ b/nvbench/state.cu @@ -149,4 +149,40 @@ std::string state::get_short_description() const return fmt::to_string(buffer); } +void state::add_element_count(std::size_t elements, + std::string column_name) +{ + m_element_count += static_cast(elements); + if (!column_name.empty()) + { + auto &summ = this->add_summary("Element count: " + column_name); + summ.set_string("short_name", std::move(column_name)); + summ.set_int64("value", static_cast(elements)); + } +} + +void state::add_global_memory_reads(std::size_t bytes, std::string column_name) +{ + m_global_memory_rw_bytes += static_cast(bytes); + if (!column_name.empty()) + { + auto &summ = this->add_summary("Input Buffer Size: " + column_name); + summ.set_string("hint", "bytes"); + summ.set_string("short_name", std::move(column_name)); + summ.set_int64("value", static_cast(bytes)); + } +} + +void state::add_global_memory_writes(std::size_t bytes, std::string column_name) +{ + m_global_memory_rw_bytes += static_cast(bytes); + if (!column_name.empty()) + { + auto &summ = this->add_summary("Output Buffer Size: " + column_name); + summ.set_string("hint", "bytes"); + summ.set_string("short_name", std::move(column_name)); + summ.set_int64("value", static_cast(bytes)); + } +} + } // namespace nvbench diff --git a/nvbench/state.cuh b/nvbench/state.cuh index fe568a2..1d53692 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -66,22 +66,40 @@ struct state [[nodiscard]] const std::string & get_string(const std::string &axis_name) const; - void set_items_processed_per_launch(nvbench::int64_t items) + void add_element_count(std::size_t elements, std::string column_name = {}); + + void set_element_count(nvbench::int64_t elements) { - m_items_processed_per_launch = items; + m_element_count = elements; } - [[nodiscard]] nvbench::int64_t get_items_processed_per_launch() const + [[nodiscard]] nvbench::int64_t get_element_count() const { - return m_items_processed_per_launch; + return m_element_count; } - void set_global_bytes_accessed_per_launch(nvbench::int64_t bytes) + template + void add_global_memory_reads(std::size_t count, std::string column_name = {}) { - m_global_bytes_accessed_per_launch = bytes; + this->add_global_memory_reads(count * sizeof(ElementType), + std::move(column_name)); } - [[nodiscard]] nvbench::int64_t get_global_bytes_accessed_per_launch() const + void add_global_memory_reads(std::size_t bytes, std::string column_name); + + template + void add_global_memory_writes(std::size_t count, std::string column_name = {}) { - return m_global_bytes_accessed_per_launch; + this->add_global_memory_writes(count * sizeof(ElementType), + std::move(column_name)); + } + void add_global_memory_writes(std::size_t bytes, std::string column_name); + + void set_global_memory_rw_bytes(nvbench::int64_t bytes) + { + m_global_memory_rw_bytes = bytes; + } + [[nodiscard]] nvbench::int64_t get_global_memory_rw_bytes() const + { + return m_global_memory_rw_bytes; } void skip(std::string reason) { m_skip_reason = std::move(reason); } @@ -196,8 +214,8 @@ private: std::vector m_summaries; std::string m_skip_reason; - nvbench::int64_t m_items_processed_per_launch{}; - nvbench::int64_t m_global_bytes_accessed_per_launch{}; + nvbench::int64_t m_element_count{}; + nvbench::int64_t m_global_memory_rw_bytes{}; }; } // namespace nvbench