mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
468 lines
15 KiB
Plaintext
468 lines
15 KiB
Plaintext
/*
|
|
* Copyright 2021 NVIDIA Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License.
|
|
*
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <nvbench/markdown_printer.cuh>
|
|
|
|
#include <nvbench/benchmark_base.cuh>
|
|
#include <nvbench/device_manager.cuh>
|
|
#include <nvbench/state.cuh>
|
|
#include <nvbench/summary.cuh>
|
|
|
|
#include <nvbench/internal/markdown_table.cuh>
|
|
|
|
#include <fmt/color.h>
|
|
#include <fmt/format.h>
|
|
|
|
#include <functional>
|
|
#include <numeric>
|
|
#include <ostream>
|
|
#include <string>
|
|
#include <type_traits>
|
|
#include <vector>
|
|
|
|
namespace nvbench
|
|
{
|
|
|
|
void markdown_printer::do_print_device_info()
|
|
{
|
|
fmt::memory_buffer buffer;
|
|
fmt::format_to(std::back_inserter(buffer), "# Devices\n\n");
|
|
|
|
const auto &device_mgr = nvbench::device_manager::get();
|
|
const auto &devices = device_mgr.get_number_of_used_devices() > 0 ? device_mgr.get_used_devices()
|
|
: device_mgr.get_devices();
|
|
for (const auto &device : devices)
|
|
{
|
|
const auto [gmem_free, gmem_used] = device.get_global_memory_usage();
|
|
|
|
fmt::format_to(std::back_inserter(buffer), "## [{}] `{}`\n", device.get_id(), device.get_name());
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* SM Version: {} (PTX Version: {})\n",
|
|
device.get_sm_version(),
|
|
device.get_ptx_version());
|
|
fmt::format_to(std::back_inserter(buffer), "* Number of SMs: {}\n", device.get_number_of_sms());
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* SM Default Clock Rate: {} MHz\n",
|
|
device.get_sm_default_clock_rate() / 1000 / 1000);
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* Global Memory: {} MiB Free / {} MiB Total\n",
|
|
gmem_free / 1024 / 1024,
|
|
gmem_used / 1024 / 1024);
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* Global Memory Bus Peak: {} GB/sec ({}-bit DDR @{}MHz)\n",
|
|
device.get_global_memory_bus_bandwidth() / 1000 / 1000 / 1000,
|
|
device.get_global_memory_bus_width(),
|
|
device.get_global_memory_bus_peak_clock_rate() / 1000 / 1000);
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* Max Shared Memory: {} KiB/SM, {} KiB/Block\n",
|
|
device.get_shared_memory_per_sm() / 1024,
|
|
device.get_shared_memory_per_block() / 1024);
|
|
fmt::format_to(std::back_inserter(buffer), "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024);
|
|
fmt::format_to(std::back_inserter(buffer), "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm());
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* Maximum Active Threads: {}/SM, {}/Block\n",
|
|
device.get_max_threads_per_sm(),
|
|
device.get_max_threads_per_block());
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* Available Registers: {}/SM, {}/Block\n",
|
|
device.get_registers_per_sm(),
|
|
device.get_registers_per_block());
|
|
fmt::format_to(std::back_inserter(buffer), "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No");
|
|
fmt::format_to(std::back_inserter(buffer), "\n");
|
|
}
|
|
m_ostream << fmt::to_string(buffer);
|
|
}
|
|
|
|
void markdown_printer::do_print_log_preamble() { m_ostream << "# Log\n\n```\n"; }
|
|
|
|
void markdown_printer::do_print_log_epilogue() { m_ostream << "```\n\n"; }
|
|
|
|
void markdown_printer::do_log(nvbench::log_level level, const std::string &msg)
|
|
{
|
|
const fmt::text_style no_style;
|
|
const auto bg_bold = bg(fmt::color::black) | fmt::emphasis::bold;
|
|
|
|
const auto run_color = bg_bold | fg(fmt::color::white);
|
|
const auto pass_color = bg_bold | fg(fmt::color::dark_green);
|
|
const auto fail_color = bg_bold | fg(fmt::color::red);
|
|
const auto skip_color = bg_bold | fg(fmt::color::steel_blue);
|
|
const auto warn_color = bg_bold | fg(fmt::rgb{160, 135, 0}); // yellow
|
|
const auto info_color = bg_bold | fg(fmt::color::light_gray);
|
|
|
|
std::string tag;
|
|
switch (level)
|
|
{
|
|
case log_level::run:
|
|
tag = fmt::format(m_color ? run_color : no_style, "{:<5}", "Run:");
|
|
break;
|
|
case log_level::pass:
|
|
tag = fmt::format(m_color ? pass_color : no_style, "{:<5}", "Pass:");
|
|
break;
|
|
case log_level::fail:
|
|
tag = fmt::format(m_color ? fail_color : no_style, "{:<5}", "Fail:");
|
|
break;
|
|
case log_level::skip:
|
|
tag = fmt::format(m_color ? skip_color : no_style, "{:<5}", "Skip:");
|
|
break;
|
|
case log_level::warn:
|
|
tag = fmt::format(m_color ? warn_color : no_style, "{:<5}", "Warn:");
|
|
break;
|
|
case log_level::info:
|
|
tag = fmt::format(m_color ? info_color : no_style, "{:<5}", "Info:");
|
|
break;
|
|
}
|
|
|
|
// Flush each time; this is the only user-visible indication that a benchmark
|
|
// is running.
|
|
m_ostream << tag << " " << msg << std::endl;
|
|
}
|
|
|
|
void markdown_printer::do_log_run_state(const nvbench::state &exec_state)
|
|
{
|
|
if (m_total_state_count == 0)
|
|
{ // No progress info
|
|
this->log(nvbench::log_level::run, exec_state.get_short_description(m_color));
|
|
}
|
|
else
|
|
{ // Add progress
|
|
this->log(nvbench::log_level::run,
|
|
fmt::format("[{}/{}] {}",
|
|
m_completed_state_count + 1,
|
|
m_total_state_count,
|
|
exec_state.get_short_description(m_color)));
|
|
}
|
|
}
|
|
|
|
void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vector &benches)
|
|
{
|
|
if (benches.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
fmt::memory_buffer buffer;
|
|
fmt::format_to(std::back_inserter(buffer), "# Benchmarks\n\n");
|
|
std::size_t benchmark_id{0};
|
|
for (const auto &bench_ptr : benches)
|
|
{
|
|
const auto &axes = bench_ptr->get_axes().get_axes();
|
|
const std::size_t num_configs = bench_ptr->get_config_count();
|
|
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"## [{}] `{}` ({} configurations)\n\n",
|
|
benchmark_id++,
|
|
bench_ptr->get_name(),
|
|
num_configs);
|
|
|
|
fmt::format_to(std::back_inserter(buffer), "### Axes\n\n");
|
|
for (const auto &axis_ptr : axes)
|
|
{
|
|
std::string flags_str(axis_ptr->get_flags_as_string());
|
|
if (!flags_str.empty())
|
|
{
|
|
flags_str = fmt::format(" [{}]", flags_str);
|
|
}
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"* `{}` : {}{}\n",
|
|
axis_ptr->get_name(),
|
|
axis_ptr->get_type_as_string(),
|
|
flags_str);
|
|
|
|
const std::size_t num_vals = axis_ptr->get_size();
|
|
for (std::size_t i = 0; i < num_vals; ++i)
|
|
{
|
|
std::string desc = axis_ptr->get_description(i);
|
|
if (!desc.empty())
|
|
{
|
|
desc = fmt::format(" ({})", desc);
|
|
}
|
|
fmt::format_to(std::back_inserter(buffer), " * `{}`{}\n", axis_ptr->get_input_string(i), desc);
|
|
} // end foreach value
|
|
} // end foreach axis
|
|
fmt::format_to(std::back_inserter(buffer), "\n");
|
|
} // end foreach bench
|
|
|
|
m_ostream << fmt::to_string(buffer);
|
|
}
|
|
|
|
void markdown_printer::do_print_benchmark_results(const printer_base::benchmark_vector &benches)
|
|
{
|
|
auto format_visitor = [](const auto &v) {
|
|
using T = std::decay_t<decltype(v)>;
|
|
if constexpr (std::is_same_v<T, nvbench::float64_t>)
|
|
{
|
|
return fmt::format("{:.5g}", v);
|
|
}
|
|
else if constexpr (std::is_same_v<T, std::string>)
|
|
{
|
|
return v;
|
|
}
|
|
|
|
// warning C4702: unreachable code
|
|
// This is a future-proofing fallback that's currently unused.
|
|
NVBENCH_MSVC_PUSH_DISABLE_WARNING(4702)
|
|
return fmt::format("{}", v);
|
|
};
|
|
NVBENCH_MSVC_POP_WARNING()
|
|
|
|
// Start printing benchmarks
|
|
fmt::memory_buffer buffer;
|
|
fmt::format_to(std::back_inserter(buffer), "# Benchmark Results\n");
|
|
|
|
for (const auto &bench_ptr : benches)
|
|
{
|
|
const auto &bench = *bench_ptr;
|
|
const auto &devices = bench.get_devices();
|
|
const auto &axes = bench.get_axes();
|
|
|
|
fmt::format_to(std::back_inserter(buffer), "\n## {}\n", bench.get_name());
|
|
|
|
// Do a single pass when no devices are specified. This happens for
|
|
// benchmarks with `cpu` exec_tags.
|
|
const std::size_t num_device_passes = devices.empty() ? 1 : devices.size();
|
|
for (std::size_t device_pass = 0; device_pass < num_device_passes; ++device_pass)
|
|
{
|
|
std::optional<nvbench::device_info> device = devices.empty()
|
|
? std::nullopt
|
|
: std::make_optional(devices[device_pass]);
|
|
|
|
if (device)
|
|
{
|
|
fmt::format_to(std::back_inserter(buffer), "\n### [{}] {}\n\n", device->get_id(), device->get_name());
|
|
}
|
|
|
|
std::size_t row = 0;
|
|
nvbench::internal::markdown_table table{m_color};
|
|
|
|
for (const auto &cur_state : bench.get_states())
|
|
{
|
|
if (cur_state.is_skipped())
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (cur_state.get_device() == device)
|
|
{
|
|
const auto &axis_values = cur_state.get_axis_values();
|
|
for (const auto &name : axis_values.get_names())
|
|
{
|
|
// Handle power-of-two int64 axes differently:
|
|
if (axis_values.get_type(name) == named_values::type::int64 &&
|
|
axes.get_int64_axis(name).is_power_of_two())
|
|
{
|
|
const nvbench::int64_t value = axis_values.get_int64(name);
|
|
const nvbench::int64_t exponent = int64_axis::compute_log2(value);
|
|
table.add_cell(row, name, name, fmt::format("2^{} = {}", exponent, value));
|
|
}
|
|
else
|
|
{
|
|
std::string value = std::visit(format_visitor, axis_values.get_value(name));
|
|
table.add_cell(row, name + "_axis", name, std::move(value));
|
|
}
|
|
}
|
|
|
|
for (const auto &summ : cur_state.get_summaries())
|
|
{
|
|
if (summ.has_value("hide"))
|
|
{
|
|
continue;
|
|
}
|
|
const std::string &tag = summ.get_tag();
|
|
const std::string &header = summ.has_value("name") ? summ.get_string("name") : tag;
|
|
|
|
std::string hint = summ.has_value("hint") ? summ.get_string("hint") : std::string{};
|
|
if (hint == "duration")
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_duration(summ));
|
|
}
|
|
else if (hint == "item_rate")
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_item_rate(summ));
|
|
}
|
|
else if (hint == "bytes")
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_bytes(summ));
|
|
}
|
|
else if (hint == "byte_rate")
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_byte_rate(summ));
|
|
}
|
|
else if (hint == "sample_size")
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_sample_size(summ));
|
|
}
|
|
else if (hint == "percentage")
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_percentage(summ));
|
|
}
|
|
else
|
|
{
|
|
table.add_cell(row, tag, header, this->do_format_default(summ));
|
|
}
|
|
}
|
|
row++;
|
|
}
|
|
}
|
|
|
|
auto table_str = table.to_string();
|
|
fmt::format_to(std::back_inserter(buffer),
|
|
"{}",
|
|
table_str.empty() ? "No data -- check log.\n" : std::move(table_str));
|
|
} // end foreach device_pass
|
|
}
|
|
|
|
m_ostream << fmt::to_string(buffer);
|
|
}
|
|
|
|
std::string markdown_printer::do_format_default(const summary &data)
|
|
{
|
|
auto format_visitor = [](const auto &v) {
|
|
using T = std::decay_t<decltype(v)>;
|
|
if constexpr (std::is_same_v<T, nvbench::float64_t>)
|
|
{
|
|
return fmt::format("{:.5g}", v);
|
|
}
|
|
else if constexpr (std::is_same_v<T, std::string>)
|
|
{
|
|
return v;
|
|
}
|
|
|
|
// warning C4702: unreachable code
|
|
// This is a future-proofing fallback that's currently unused.
|
|
NVBENCH_MSVC_PUSH_DISABLE_WARNING(4702)
|
|
return fmt::format("{}", v);
|
|
};
|
|
NVBENCH_MSVC_POP_WARNING()
|
|
|
|
return std::visit(format_visitor, data.get_value("value"));
|
|
}
|
|
|
|
std::string markdown_printer::do_format_duration(const summary &data)
|
|
{
|
|
const auto seconds = data.get_float64("value");
|
|
if (seconds >= 1.) // 1+ sec
|
|
{
|
|
return fmt::format("{:0.3f} s", seconds);
|
|
}
|
|
else if (seconds >= 1e-3) // 1+ ms.
|
|
{
|
|
return fmt::format("{:0.3f} ms", seconds * 1e3);
|
|
}
|
|
else if (seconds >= 1e-6) // 1+ us.
|
|
{
|
|
return fmt::format("{:0.3f} us", seconds * 1e6);
|
|
}
|
|
else
|
|
{
|
|
return fmt::format("{:0.3f} ns", seconds * 1e9);
|
|
}
|
|
}
|
|
|
|
std::string markdown_printer::do_format_item_rate(const summary &data)
|
|
{
|
|
const auto items_per_second = data.get_float64("value");
|
|
if (items_per_second >= 1e15)
|
|
{
|
|
return fmt::format("{:0.3f}P", items_per_second * 1e-15);
|
|
}
|
|
else if (items_per_second >= 1e12)
|
|
{
|
|
return fmt::format("{:0.3f}T", items_per_second * 1e-12);
|
|
}
|
|
else if (items_per_second >= 1e9)
|
|
{
|
|
return fmt::format("{:0.3f}G", items_per_second * 1e-9);
|
|
}
|
|
else if (items_per_second >= 1e6)
|
|
{
|
|
return fmt::format("{:0.3f}M", items_per_second * 1e-6);
|
|
}
|
|
else if (items_per_second >= 1e3)
|
|
{
|
|
return fmt::format("{:0.3f}K", items_per_second * 1e-3);
|
|
}
|
|
else
|
|
{
|
|
return fmt::format("{:0.3f}", items_per_second);
|
|
}
|
|
}
|
|
|
|
std::string markdown_printer::do_format_bytes(const summary &data)
|
|
{
|
|
const auto bytes = static_cast<nvbench::float64_t>(data.get_int64("value"));
|
|
if (bytes >= 1024. * 1024. * 1024.) // 1 GiB
|
|
{
|
|
return fmt::format("{:0.3f} GiB", bytes / (1024. * 1024. * 1024.));
|
|
}
|
|
else if (bytes >= 1024. * 1024.) // 1 MiB
|
|
{
|
|
return fmt::format("{:0.3f} MiB", bytes / (1024. * 1024.));
|
|
}
|
|
else if (bytes >= 1024) // 1 KiB.
|
|
{
|
|
return fmt::format("{:0.3f} KiB", bytes / 1024.);
|
|
}
|
|
else
|
|
{
|
|
return fmt::format("{:0.3f} B", static_cast<nvbench::float64_t>(bytes));
|
|
}
|
|
}
|
|
|
|
std::string markdown_printer::do_format_byte_rate(const summary &data)
|
|
{
|
|
const auto bytes_per_second = data.get_float64("value");
|
|
if (bytes_per_second >= 1e15)
|
|
{
|
|
return fmt::format("{:0.3f} PB/s", bytes_per_second * 1e-15);
|
|
}
|
|
else if (bytes_per_second >= 1e12)
|
|
{
|
|
return fmt::format("{:0.3f} TB/s", bytes_per_second * 1e-12);
|
|
}
|
|
else if (bytes_per_second >= 1e9)
|
|
{
|
|
return fmt::format("{:0.3f} GB/s", bytes_per_second * 1e-9);
|
|
}
|
|
else if (bytes_per_second >= 1e6)
|
|
{
|
|
return fmt::format("{:0.3f} MB/s", bytes_per_second * 1e-6);
|
|
}
|
|
else if (bytes_per_second >= 1e3)
|
|
{
|
|
return fmt::format("{:0.3f} KB/s", bytes_per_second * 1e-3);
|
|
}
|
|
else
|
|
{
|
|
return fmt::format("{:0.3f} B/s", bytes_per_second);
|
|
}
|
|
}
|
|
|
|
std::string markdown_printer::do_format_sample_size(const summary &data)
|
|
{
|
|
const auto count = data.get_int64("value");
|
|
return fmt::format("{}x", count);
|
|
}
|
|
|
|
std::string markdown_printer::do_format_percentage(const summary &data)
|
|
{
|
|
const auto percentage = data.get_float64("value");
|
|
return fmt::format("{:.2f}%", percentage * 100.);
|
|
}
|
|
|
|
} // namespace nvbench
|