mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Execute benchmarks on all devices.
This commit is contained in:
@@ -1,8 +1,14 @@
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
|
||||
#include <nvbench/device_manager.cuh>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
benchmark_base::benchmark_base()
|
||||
: m_devices(nvbench::device_manager::get().get_devices())
|
||||
{}
|
||||
|
||||
benchmark_base::~benchmark_base() = default;
|
||||
|
||||
std::unique_ptr<benchmark_base> benchmark_base::clone() const
|
||||
@@ -16,5 +22,20 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
void benchmark_base::set_devices(std::vector<int> device_ids)
|
||||
{
|
||||
std::vector<device_info> devices;
|
||||
devices.reserve(device_ids.size());
|
||||
for (int dev_id : device_ids)
|
||||
{
|
||||
devices.emplace_back(dev_id);
|
||||
}
|
||||
this->set_devices(std::move(devices));
|
||||
}
|
||||
|
||||
void benchmark_base::add_device(int device_id)
|
||||
{
|
||||
this->add_device(device_info{device_id});
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <memory>
|
||||
@@ -21,6 +22,7 @@ struct runner;
|
||||
*/
|
||||
struct benchmark_base
|
||||
{
|
||||
benchmark_base();
|
||||
virtual ~benchmark_base();
|
||||
|
||||
/**
|
||||
@@ -77,25 +79,37 @@ struct benchmark_base
|
||||
return *this;
|
||||
}
|
||||
|
||||
[[nodiscard]] nvbench::axes_metadata &get_axes()
|
||||
void set_devices(std::vector<int> device_ids);
|
||||
|
||||
void set_devices(std::vector<nvbench::device_info> devices)
|
||||
{
|
||||
return m_axes;
|
||||
m_devices = std::move(devices);
|
||||
}
|
||||
|
||||
void add_device(int device_id);
|
||||
|
||||
void add_device(nvbench::device_info device)
|
||||
{
|
||||
m_devices.push_back(std::move(device));
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::vector<nvbench::device_info> &get_devices() const
|
||||
{
|
||||
return m_devices;
|
||||
}
|
||||
|
||||
[[nodiscard]] nvbench::axes_metadata &get_axes() { return m_axes; }
|
||||
|
||||
[[nodiscard]] const nvbench::axes_metadata &get_axes() const
|
||||
{
|
||||
return m_axes;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::vector<std::vector<nvbench::state>> &
|
||||
get_states() const
|
||||
{
|
||||
return m_states;
|
||||
}
|
||||
[[nodiscard]] std::vector<std::vector<nvbench::state>> &get_states()
|
||||
[[nodiscard]] const std::vector<nvbench::state> &get_states() const
|
||||
{
|
||||
return m_states;
|
||||
}
|
||||
[[nodiscard]] std::vector<nvbench::state> &get_states() { return m_states; }
|
||||
|
||||
void run() { this->do_run(); }
|
||||
|
||||
@@ -105,7 +119,8 @@ protected:
|
||||
|
||||
std::string m_name;
|
||||
nvbench::axes_metadata m_axes;
|
||||
std::vector<std::vector<nvbench::state>> m_states;
|
||||
std::vector<nvbench::device_info> m_devices;
|
||||
std::vector<nvbench::state> m_states;
|
||||
|
||||
private:
|
||||
// route these through virtuals so the templated subclass can inject type info
|
||||
|
||||
@@ -218,6 +218,8 @@ void markdown_format::print_benchmark_summaries(
|
||||
|
||||
void markdown_format::print_benchmark_results(const benchmark_vector &benchmarks)
|
||||
{
|
||||
// This needs to be refactored and cleaned up (someday....) but here's a
|
||||
// buncha functors that do various string formatting stuff:
|
||||
auto format_visitor = [](const auto &v) {
|
||||
using T = std::decay_t<decltype(v)>;
|
||||
if constexpr (std::is_same_v<T, nvbench::float64_t>)
|
||||
@@ -312,110 +314,129 @@ void markdown_format::print_benchmark_results(const benchmark_vector &benchmarks
|
||||
return fmt::format("{:.2f}%", percentage);
|
||||
};
|
||||
|
||||
fmt::print("# Benchmark Summaries\n");
|
||||
// Start printing benchmarks
|
||||
fmt::print("# Benchmark Results\n");
|
||||
|
||||
for (const auto &bench_ptr : benchmarks)
|
||||
{
|
||||
const benchmark_base &bench = *bench_ptr;
|
||||
const axes_metadata &axes = bench.get_axes();
|
||||
const auto &bench = *bench_ptr;
|
||||
const auto &devices = bench.get_devices();
|
||||
const auto &axes = bench.get_axes();
|
||||
|
||||
fmt::print("\n## {}\n\n", bench.get_name());
|
||||
fmt::print("\n## {}\n", bench.get_name());
|
||||
|
||||
std::size_t row = 0;
|
||||
table_builder table;
|
||||
|
||||
for (const auto &inner_states : bench.get_states())
|
||||
// Do a single pass when no devices are specified. This happens for
|
||||
// benchmarks with `cpu` exec_tags.
|
||||
const std::size_t num_device_passes = devices.empty() ? 1 : devices.size();
|
||||
for (std::size_t device_pass = 0; device_pass < num_device_passes;
|
||||
++device_pass)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
std::optional<nvbench::device_info> device =
|
||||
devices.empty() ? std::nullopt
|
||||
: std::make_optional(devices[device_pass]);
|
||||
|
||||
if (device)
|
||||
{
|
||||
const auto &axis_values = state.get_axis_values();
|
||||
for (const auto &name : axis_values.get_names())
|
||||
{
|
||||
// Handle power-of-two int64 axes differently:
|
||||
if (axis_values.get_type(name) == named_values::type::int64 &&
|
||||
axes.get_int64_axis(name).is_power_of_two())
|
||||
{
|
||||
const nvbench::uint64_t value = axis_values.get_int64(name);
|
||||
const nvbench::uint64_t exponent = int64_axis::compute_log2(value);
|
||||
table.add_cell(row,
|
||||
name + "_axis_pretty",
|
||||
name,
|
||||
fmt::format("2^{}", exponent));
|
||||
table.add_cell(row,
|
||||
name + "_axis_descriptive",
|
||||
fmt::format("({})", name),
|
||||
fmt::to_string(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string value = std::visit(format_visitor,
|
||||
axis_values.get_value(name));
|
||||
table.add_cell(row, name + "_axis", name, std::move(value));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &summ : state.get_summaries())
|
||||
{
|
||||
if (summ.has_value("hide"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::string &key = summ.get_name();
|
||||
const std::string &header = summ.has_value("short_name")
|
||||
? summ.get_string("short_name")
|
||||
: key;
|
||||
|
||||
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
|
||||
: std::string{};
|
||||
if (hint == "duration")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_duration(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "item_rate")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_item_rate(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "bytes")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_bytes(summ.get_int64("value")));
|
||||
}
|
||||
else if (hint == "byte_rate")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_byte_rate(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "percentage")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_percentage(summ.get_float64("value")));
|
||||
}
|
||||
else
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
std::visit(format_visitor, summ.get_value("value")));
|
||||
}
|
||||
}
|
||||
row++;
|
||||
fmt::print("\n### [{}] {}\n\n", device->get_id(), device->get_name());
|
||||
}
|
||||
}
|
||||
|
||||
fmt::print("{}", table.to_string());
|
||||
} // end foreach benchmark
|
||||
std::size_t row = 0;
|
||||
table_builder table;
|
||||
|
||||
for (const auto &cur_state : bench.get_states())
|
||||
{
|
||||
if (cur_state.get_device() == device)
|
||||
{
|
||||
const auto &axis_values = cur_state.get_axis_values();
|
||||
for (const auto &name : axis_values.get_names())
|
||||
{
|
||||
// Handle power-of-two int64 axes differently:
|
||||
if (axis_values.get_type(name) == named_values::type::int64 &&
|
||||
axes.get_int64_axis(name).is_power_of_two())
|
||||
{
|
||||
const nvbench::int64_t value = axis_values.get_int64(name);
|
||||
const nvbench::int64_t exponent = int64_axis::compute_log2(value);
|
||||
table.add_cell(row,
|
||||
name + "_axis_pretty",
|
||||
name,
|
||||
fmt::format("2^{}", exponent));
|
||||
table.add_cell(row,
|
||||
name + "_axis_descriptive",
|
||||
fmt::format("({})", name),
|
||||
fmt::to_string(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string value = std::visit(format_visitor,
|
||||
axis_values.get_value(name));
|
||||
table.add_cell(row, name + "_axis", name, std::move(value));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &summ : cur_state.get_summaries())
|
||||
{
|
||||
if (summ.has_value("hide"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::string &key = summ.get_name();
|
||||
const std::string &header = summ.has_value("short_name")
|
||||
? summ.get_string("short_name")
|
||||
: key;
|
||||
|
||||
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
|
||||
: std::string{};
|
||||
if (hint == "duration")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_duration(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "item_rate")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_item_rate(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "bytes")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_bytes(summ.get_int64("value")));
|
||||
}
|
||||
else if (hint == "byte_rate")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_byte_rate(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "percentage")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
format_percentage(summ.get_float64("value")));
|
||||
}
|
||||
else
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
header,
|
||||
std::visit(format_visitor,
|
||||
summ.get_value("value")));
|
||||
}
|
||||
}
|
||||
row++;
|
||||
}
|
||||
}
|
||||
|
||||
fmt::print("{}", table.to_string());
|
||||
} // end foreach device_pass
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <stdexcept>
|
||||
#include <variant>
|
||||
|
||||
namespace nvbench
|
||||
@@ -16,9 +17,29 @@ namespace nvbench
|
||||
namespace detail
|
||||
{
|
||||
|
||||
void measure_cold_base::check()
|
||||
{
|
||||
const auto device = m_state.get_device();
|
||||
if (!device)
|
||||
{
|
||||
throw std::runtime_error(fmt::format("{}:{}: Device required for `cold` "
|
||||
"measurement.",
|
||||
__FILE__,
|
||||
__LINE__));
|
||||
}
|
||||
if (!device->is_active())
|
||||
{ // This means something went wrong higher up. Throw an error.
|
||||
throw std::runtime_error(fmt::format("{}:{}: Internal error: Current "
|
||||
"device is not active.",
|
||||
__FILE__,
|
||||
__LINE__));
|
||||
}
|
||||
}
|
||||
|
||||
void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto avg_cuda_time = m_total_cuda_time / m_total_iters;
|
||||
const auto d_iters = static_cast<double>(m_total_iters);
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_iters;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Cold)");
|
||||
summ.set_string("hint", "duration");
|
||||
@@ -39,7 +60,7 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_float64("value", m_cuda_noise);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_total_cpu_time / m_total_iters;
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_iters;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Cold)");
|
||||
summ.set_string("hint", "duration");
|
||||
@@ -70,7 +91,7 @@ void measure_cold_base::generate_summaries()
|
||||
|
||||
// Log to stdout:
|
||||
fmt::memory_buffer param_buffer;
|
||||
fmt::format_to(param_buffer, "");
|
||||
fmt::format_to(param_buffer, "Device={}", m_state.get_device()->get_id());
|
||||
const axes_metadata &axes = m_state.get_benchmark().get_axes();
|
||||
const auto &axis_values = m_state.get_axis_values();
|
||||
for (const auto &name : axis_values.get_names())
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
@@ -33,6 +34,9 @@ struct measure_cold_base
|
||||
measure_cold_base &operator=(measure_cold_base &&) = delete;
|
||||
|
||||
protected:
|
||||
|
||||
void check();
|
||||
|
||||
void initialize()
|
||||
{
|
||||
m_total_cuda_time = 0.;
|
||||
@@ -54,15 +58,15 @@ protected:
|
||||
nvbench::cpu_timer m_cpu_timer;
|
||||
nvbench::detail::l2flush m_l2flush;
|
||||
|
||||
nvbench::int64_t m_min_iters{100};
|
||||
nvbench::int64_t m_min_iters{10};
|
||||
nvbench::int64_t m_total_iters{};
|
||||
|
||||
nvbench::float64_t m_max_noise{1.0}; // % rel stdev
|
||||
nvbench::float64_t m_max_noise{0.5}; // % rel stdev
|
||||
nvbench::float64_t m_cuda_noise{}; // % rel stdev
|
||||
nvbench::float64_t m_cpu_noise{}; // % rel stdev
|
||||
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_time{1.0};
|
||||
nvbench::float64_t m_max_time{3.0};
|
||||
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
@@ -83,6 +87,7 @@ struct measure_cold : public measure_cold_base
|
||||
|
||||
void operator()()
|
||||
{
|
||||
this->check();
|
||||
this->initialize();
|
||||
this->run_warmup();
|
||||
this->run_trials();
|
||||
|
||||
@@ -10,16 +10,31 @@
|
||||
#include <cstdio>
|
||||
#include <variant>
|
||||
|
||||
// TODO these can be removed once there's a device_manager or some such:
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
void measure_hot_base::check()
|
||||
{
|
||||
const auto device = m_state.get_device();
|
||||
if (!device)
|
||||
{
|
||||
throw std::runtime_error(fmt::format("{}:{}: Device required for `hot` "
|
||||
"measurement.",
|
||||
__FILE__,
|
||||
__LINE__));
|
||||
}
|
||||
if (!device->is_active())
|
||||
{ // This means something went wrong higher up. Throw an error.
|
||||
throw std::runtime_error(fmt::format("{}:{}: Internal error: Current "
|
||||
"device is not active.",
|
||||
__FILE__,
|
||||
__LINE__));
|
||||
}
|
||||
}
|
||||
|
||||
measure_hot_base::measure_hot_base(state &exec_state)
|
||||
: m_state(exec_state)
|
||||
{
|
||||
@@ -48,7 +63,8 @@ measure_hot_base::measure_hot_base(state &exec_state)
|
||||
|
||||
void measure_hot_base::generate_summaries()
|
||||
{
|
||||
const auto avg_cuda_time = m_total_cuda_time / m_total_iters;
|
||||
const auto d_iters = static_cast<double>(m_total_iters);
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_iters;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Hot)");
|
||||
summ.set_string("hint", "duration");
|
||||
@@ -59,7 +75,7 @@ void measure_hot_base::generate_summaries()
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_total_cpu_time / m_total_iters;
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_iters;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Hot)");
|
||||
summ.set_string("hide",
|
||||
@@ -86,13 +102,13 @@ void measure_hot_base::generate_summaries()
|
||||
summ.set_string("hint", "item_rate");
|
||||
summ.set_string("short_name", "Item Rate");
|
||||
summ.set_string("description", "Number of input items handled per second.");
|
||||
summ.set_float64("value", items / avg_cuda_time);
|
||||
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
|
||||
}
|
||||
|
||||
if (const auto bytes = m_state.get_global_bytes_accessed_per_launch();
|
||||
bytes != 0)
|
||||
{
|
||||
const auto avg_used_gmem_bw = bytes / avg_cuda_time;
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average Global Memory Throughput");
|
||||
summ.set_string("hint", "byte_rate");
|
||||
@@ -103,16 +119,10 @@ void measure_hot_base::generate_summaries()
|
||||
summ.set_float64("value", avg_used_gmem_bw);
|
||||
}
|
||||
|
||||
// TODO cache this in a singleton somewhere.
|
||||
int dev_id{};
|
||||
cudaDeviceProp prop{};
|
||||
NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id));
|
||||
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&prop, dev_id));
|
||||
// clock rate in khz, width in bits. Result in bytes/sec.
|
||||
const auto peak_gmem_bw = 2 * 1000. * prop.memoryClockRate * // (sec^-1)
|
||||
prop.memoryBusWidth / CHAR_BIT; // bytes
|
||||
|
||||
{
|
||||
const auto peak_gmem_bw = static_cast<double>(
|
||||
m_state.get_device()->get_global_memory_bus_bandwidth());
|
||||
|
||||
auto &summ = m_state.add_summary("Percent Peak Global Memory Throughput");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("short_name", "PeakGMem");
|
||||
@@ -125,7 +135,7 @@ void measure_hot_base::generate_summaries()
|
||||
|
||||
// Log to stdout:
|
||||
fmt::memory_buffer param_buffer;
|
||||
fmt::format_to(param_buffer, "");
|
||||
fmt::format_to(param_buffer, "Device={}", m_state.get_device()->get_id());
|
||||
const axes_metadata &axes = m_state.get_benchmark().get_axes();
|
||||
const auto &axis_values = m_state.get_axis_values();
|
||||
for (const auto &name : axis_values.get_names())
|
||||
@@ -140,8 +150,8 @@ void measure_hot_base::generate_summaries()
|
||||
if (axis_values.get_type(name) == named_values::type::int64 &&
|
||||
axes.get_int64_axis(name).is_power_of_two())
|
||||
{
|
||||
const nvbench::uint64_t value = axis_values.get_int64(name);
|
||||
const nvbench::uint64_t exponent = int64_axis::compute_log2(value);
|
||||
const nvbench::int64_t value = axis_values.get_int64(name);
|
||||
const nvbench::int64_t exponent = int64_axis::compute_log2(value);
|
||||
fmt::format_to(param_buffer, "2^{}", exponent);
|
||||
}
|
||||
else
|
||||
|
||||
@@ -26,6 +26,9 @@ struct measure_hot_base
|
||||
measure_hot_base &operator=(measure_hot_base &&) = delete;
|
||||
|
||||
protected:
|
||||
|
||||
void check();
|
||||
|
||||
void initialize()
|
||||
{
|
||||
m_total_cpu_time = 0.;
|
||||
@@ -43,10 +46,10 @@ protected:
|
||||
nvbench::cpu_timer m_cpu_timer;
|
||||
|
||||
nvbench::int64_t m_total_iters{};
|
||||
nvbench::int64_t m_min_iters{100};
|
||||
nvbench::int64_t m_min_iters{10};
|
||||
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_time{1.0};
|
||||
nvbench::float64_t m_max_time{3.0};
|
||||
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
@@ -64,6 +67,7 @@ struct measure_hot : public measure_hot_base
|
||||
|
||||
void operator()()
|
||||
{
|
||||
this->check();
|
||||
this->initialize();
|
||||
this->run_warmup();
|
||||
this->run_trials();
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include <nvbench/detail/state_generator.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/type_axis.cuh>
|
||||
|
||||
@@ -207,36 +208,53 @@ void state_generator::build_axis_configs()
|
||||
|
||||
void state_generator::build_states()
|
||||
{
|
||||
// Assemble states into a std::vector<std::vector<nvbench::state>>, where the
|
||||
// outer vector has one inner vector per type_config, and all configs in an
|
||||
// inner vector use the same type config. This should probably be wrapped up
|
||||
// into a nicer data structure, but organizing states in this way makes
|
||||
// matching up states to kernel_generator instantiations much easier during
|
||||
// dispatch.
|
||||
|
||||
m_states.clear();
|
||||
m_states.reserve(m_type_axis_configs.size());
|
||||
for (const auto &[type_config, axis_mask] : m_type_axis_configs)
|
||||
|
||||
const auto &devices = m_benchmark.get_devices();
|
||||
if (devices.empty())
|
||||
{
|
||||
auto &inner_states = m_states.emplace_back();
|
||||
this->add_states_for_device(std::nullopt);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto &device : devices)
|
||||
{
|
||||
this->add_states_for_device(device);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void state_generator::add_states_for_device(
|
||||
const std::optional<device_info> &device)
|
||||
{
|
||||
const auto num_type_configs = m_type_axis_configs.size();
|
||||
for (std::size_t type_config_index = 0; type_config_index < num_type_configs;
|
||||
++type_config_index)
|
||||
{
|
||||
const auto &[type_config,
|
||||
axis_mask] = m_type_axis_configs[type_config_index];
|
||||
|
||||
if (!axis_mask)
|
||||
{ // Don't generate inner vector if the type config is masked out.
|
||||
continue;
|
||||
}
|
||||
|
||||
inner_states.reserve(m_non_type_axis_configs.size());
|
||||
for (const auto &non_type_config : m_non_type_axis_configs)
|
||||
{
|
||||
// Concatenate the type + non_type configurations:
|
||||
nvbench::named_values config = type_config;
|
||||
config.append(non_type_config);
|
||||
inner_states.push_back(nvbench::state{m_benchmark, config});
|
||||
|
||||
// Create benchmark:
|
||||
m_states.push_back(nvbench::state{m_benchmark,
|
||||
std::move(config),
|
||||
device,
|
||||
type_config_index});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<nvbench::state>>
|
||||
state_generator::create(const benchmark_base &bench)
|
||||
std::vector<nvbench::state> state_generator::create(const benchmark_base &bench)
|
||||
{
|
||||
state_generator sg{bench};
|
||||
sg.build_axis_configs();
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <nvbench/axis_base.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@@ -11,25 +12,27 @@
|
||||
namespace nvbench
|
||||
{
|
||||
struct benchmark_base;
|
||||
struct device_info;
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
struct state_generator
|
||||
{
|
||||
static std::vector<std::vector<nvbench::state>>
|
||||
create(const benchmark_base &bench);
|
||||
static std::vector<nvbench::state> create(const benchmark_base &bench);
|
||||
|
||||
private:
|
||||
explicit state_generator(const benchmark_base &bench);
|
||||
|
||||
void build_axis_configs();
|
||||
void build_states();
|
||||
void add_states_for_device(const std::optional<nvbench::device_info> &device);
|
||||
|
||||
const benchmark_base &m_benchmark;
|
||||
// bool is a mask value; true if the config is used.
|
||||
std::vector<std::pair<nvbench::named_values, bool>> m_type_axis_configs;
|
||||
std::vector<nvbench::named_values> m_non_type_axis_configs;
|
||||
std::vector<std::vector<nvbench::state>> m_states;
|
||||
std::vector<nvbench::state> m_states;
|
||||
};
|
||||
|
||||
// Detail class; Generates a cartesian product of axis indices.
|
||||
@@ -73,6 +76,5 @@ struct state_iterator
|
||||
std::size_t m_total{};
|
||||
};
|
||||
|
||||
|
||||
} // namespace detail
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -15,11 +15,17 @@ namespace nvbench
|
||||
namespace detail
|
||||
{
|
||||
int get_ptx_version(int);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
struct device_info
|
||||
{
|
||||
explicit device_info(int id);
|
||||
explicit device_info(int device_id);
|
||||
|
||||
// Mainly used by unit tests:
|
||||
device_info(int device_id, cudaDeviceProp prop)
|
||||
: m_id{device_id}
|
||||
, m_prop{prop}
|
||||
{}
|
||||
|
||||
/// @return The device's id on the current system.
|
||||
[[nodiscard]] int get_id() const { return m_id; }
|
||||
@@ -30,6 +36,18 @@ struct device_info
|
||||
return std::string_view(m_prop.name);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool is_active() const
|
||||
{
|
||||
int id{-1};
|
||||
NVBENCH_CUDA_CALL(cudaGetDevice(&id));
|
||||
return id == m_id;
|
||||
}
|
||||
|
||||
void set_active() const
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaSetDevice(m_id));
|
||||
}
|
||||
|
||||
/// @return The SM version of the current device as (major*100) + (minor*10).
|
||||
[[nodiscard]] int get_sm_version() const
|
||||
{
|
||||
@@ -145,6 +163,15 @@ struct device_info
|
||||
return m_prop;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool operator==(const device_info &o) const
|
||||
{
|
||||
return m_id == o.m_id;
|
||||
}
|
||||
[[nodiscard]] bool operator!=(const device_info &o) const
|
||||
{
|
||||
return m_id != o.m_id;
|
||||
}
|
||||
|
||||
private:
|
||||
int m_id;
|
||||
cudaDeviceProp m_prop;
|
||||
@@ -152,6 +179,8 @@ private:
|
||||
|
||||
// get_ptx_version implementation; this needs to stay in the header so it will
|
||||
// pick up the downstream project's compilation settings.
|
||||
// TODO this is fragile and will break when called from any library
|
||||
// translation unit.
|
||||
namespace detail
|
||||
{
|
||||
// Templated to workaround ODR issues since __global__functions cannot be marked
|
||||
|
||||
@@ -29,24 +29,51 @@ struct runner
|
||||
|
||||
void run()
|
||||
{
|
||||
auto states_iter = m_benchmark.m_states.begin();
|
||||
if (states_iter + num_type_configs != m_benchmark.m_states.end())
|
||||
if (m_benchmark.m_devices.empty())
|
||||
{
|
||||
throw std::runtime_error("State vector doesn't match type_configs.");
|
||||
this->run_device(std::nullopt);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto &device : m_benchmark.m_devices)
|
||||
{
|
||||
this->run_device(device);
|
||||
}
|
||||
}
|
||||
|
||||
nvbench::tl::foreach<type_configs>(
|
||||
[&states_iter](auto type_config_wrapper) {
|
||||
using type_config = typename decltype(type_config_wrapper)::type;
|
||||
for (nvbench::state &cur_state : *states_iter)
|
||||
{
|
||||
kernel_generator{}(cur_state, type_config{});
|
||||
}
|
||||
states_iter++;
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void run_device(const std::optional<nvbench::device_info> &device)
|
||||
{
|
||||
if (device)
|
||||
{
|
||||
device->set_active();
|
||||
}
|
||||
|
||||
// Iterate through type_configs:
|
||||
std::size_t type_config_index = 0;
|
||||
nvbench::tl::foreach<type_configs>([&states = m_benchmark.m_states,
|
||||
&type_config_index,
|
||||
&device](auto type_config_wrapper) {
|
||||
|
||||
// Get current type_config:
|
||||
using type_config = typename decltype(type_config_wrapper)::type;
|
||||
|
||||
// Find states with the current device / type_config
|
||||
for (nvbench::state &cur_state : states)
|
||||
{
|
||||
if (cur_state.get_device() == device &&
|
||||
cur_state.get_type_config_index() == type_config_index)
|
||||
{
|
||||
kernel_generator{}(cur_state, type_config{});
|
||||
}
|
||||
}
|
||||
|
||||
++type_config_index;
|
||||
});
|
||||
}
|
||||
|
||||
benchmark_type &m_benchmark;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -17,7 +19,7 @@ namespace detail
|
||||
{
|
||||
struct state_generator;
|
||||
struct state_tester;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* Stores all information about a particular benchmark configuration.
|
||||
@@ -41,6 +43,20 @@ struct state
|
||||
state &operator=(const state &) = delete;
|
||||
state &operator=(state &&) = default;
|
||||
|
||||
/// The CUDA device associated with with this benchmark state. May be
|
||||
/// nullopt for CPU-only benchmarks.
|
||||
[[nodiscard]] const std::optional<nvbench::device_info> &get_device() const
|
||||
{
|
||||
return m_device;
|
||||
}
|
||||
|
||||
/// An index into a benchmark::type_configs type_list. Returns 0 if no type
|
||||
/// axes in the associated benchmark.
|
||||
[[nodiscard]] std::size_t get_type_config_index() const
|
||||
{
|
||||
return m_type_config_index;
|
||||
}
|
||||
|
||||
[[nodiscard]] nvbench::int64_t get_int64(const std::string &axis_name) const;
|
||||
|
||||
[[nodiscard]] nvbench::float64_t
|
||||
@@ -99,13 +115,21 @@ private:
|
||||
: m_benchmark{bench}
|
||||
{}
|
||||
|
||||
state(const benchmark_base &bench, nvbench::named_values values)
|
||||
state(const benchmark_base &bench,
|
||||
nvbench::named_values values,
|
||||
std::optional<nvbench::device_info> device,
|
||||
std::size_t type_config_index)
|
||||
: m_benchmark{bench}
|
||||
, m_axis_values{std::move(values)}
|
||||
, m_device{std::move(device)}
|
||||
, m_type_config_index{type_config_index}
|
||||
{}
|
||||
|
||||
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
|
||||
nvbench::named_values m_axis_values;
|
||||
std::optional<nvbench::device_info> m_device;
|
||||
std::size_t m_type_config_index{};
|
||||
|
||||
std::vector<nvbench::summary> m_summaries;
|
||||
std::string m_skip_reason;
|
||||
nvbench::int64_t m_items_processed_per_launch{};
|
||||
|
||||
@@ -83,18 +83,15 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench,
|
||||
std::size_t num_type_configs,
|
||||
std::size_t states_per_type_config)
|
||||
{
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.run();
|
||||
fmt::memory_buffer buffer;
|
||||
const auto &states = bench.get_states();
|
||||
ASSERT(states.size() == num_type_configs);
|
||||
for (const auto &inner_states : states)
|
||||
ASSERT(states.size() == num_type_configs * states_per_type_config);
|
||||
for (const auto &state : states)
|
||||
{
|
||||
ASSERT(inner_states.size() == states_per_type_config);
|
||||
for (const auto &state : inner_states)
|
||||
{
|
||||
ASSERT(state.is_skipped());
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
}
|
||||
ASSERT(state.is_skipped());
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
}
|
||||
return fmt::to_string(buffer);
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ namespace
|
||||
{
|
||||
|
||||
[[nodiscard]] std::string
|
||||
states_to_string(const std::vector<std::vector<nvbench::state>> &states)
|
||||
states_to_string(const std::vector<nvbench::state> &states)
|
||||
{
|
||||
fmt::memory_buffer buffer;
|
||||
std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} "
|
||||
@@ -50,24 +50,19 @@ states_to_string(const std::vector<std::vector<nvbench::state>> &states)
|
||||
"Floats",
|
||||
"Strings");
|
||||
|
||||
std::size_t type_config = 0;
|
||||
std::size_t config = 0;
|
||||
for (const auto &inner_states : states)
|
||||
std::size_t config = 0;
|
||||
for (const auto &state : states)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
{
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
type_config,
|
||||
state.get_string("T"),
|
||||
state.get_string("U"),
|
||||
state.get_int64("Ints"),
|
||||
state.get_int64("PO2s"),
|
||||
state.get_float64("Floats"),
|
||||
std::string{"\'"} + state.get_string("Strings") + "'");
|
||||
}
|
||||
type_config++;
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
state.get_type_config_index(),
|
||||
state.get_string("T"),
|
||||
state.get_string("U"),
|
||||
state.get_int64("Ints"),
|
||||
state.get_int64("PO2s"),
|
||||
state.get_float64("Floats"),
|
||||
std::string{"\'"} + state.get_string("Strings") + "'");
|
||||
}
|
||||
return fmt::to_string(buffer);
|
||||
}
|
||||
@@ -333,8 +328,7 @@ void test_int64_axis_pow2_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = 7 "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = 7 "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -451,8 +445,7 @@ void test_int64_axis_none_to_pow2_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = 7 "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = 7 "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -569,8 +562,7 @@ void test_int64_axis_pow2_to_none_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = 2 "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = 2 "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -584,8 +576,7 @@ void test_int64_axis_pow2_to_none_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -687,8 +678,7 @@ void test_float64_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Floats [ ] = 3.5 "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = 3.5 "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -727,8 +717,7 @@ void test_float64_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", "Floats=[3.5:3.6]"});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", "Floats=[3.5:3.6]"});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -893,8 +882,7 @@ void test_type_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " T [ ] = U8 "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = U8 "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -908,8 +896,7 @@ void test_type_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " T [ ] = [ U8 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = [ U8 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
|
||||
@@ -73,12 +73,10 @@ void test_empty()
|
||||
|
||||
runner.generate_states();
|
||||
ASSERT(bench.get_states().size() == 1);
|
||||
ASSERT(bench.get_states().front().size() == 1);
|
||||
ASSERT(bench.get_states().front().front().is_skipped() == false);
|
||||
ASSERT(bench.get_states().front().is_skipped() == false);
|
||||
runner.run();
|
||||
ASSERT(bench.get_states().size() == 1);
|
||||
ASSERT(bench.get_states().front().size() == 1);
|
||||
ASSERT(bench.get_states().front().front().is_skipped() == true);
|
||||
ASSERT(bench.get_states().front().is_skipped() == true);
|
||||
}
|
||||
|
||||
void test_non_types()
|
||||
@@ -94,18 +92,16 @@ void test_non_types()
|
||||
runner_type runner{bench};
|
||||
|
||||
runner.generate_states();
|
||||
ASSERT(bench.get_states().size() == 1);
|
||||
ASSERT(bench.get_states().front().size() == 27);
|
||||
for (const auto &state : bench.get_states().front())
|
||||
ASSERT(bench.get_states().size() == 27);
|
||||
for (const auto &state : bench.get_states())
|
||||
{
|
||||
ASSERT(state.is_skipped() == false);
|
||||
}
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
runner.run();
|
||||
ASSERT(bench.get_states().size() == 1);
|
||||
ASSERT(bench.get_states().front().size() == 27);
|
||||
for (const auto &state : bench.get_states().front())
|
||||
ASSERT(bench.get_states().size() == 27);
|
||||
for (const auto &state : bench.get_states())
|
||||
{
|
||||
ASSERT(state.is_skipped() == true);
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
@@ -150,32 +146,25 @@ void test_types()
|
||||
using runner_type = nvbench::runner<benchmark_type>;
|
||||
|
||||
benchmark_type bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.set_type_axes_names({"FloatT", "IntT", "MiscT"});
|
||||
|
||||
runner_type runner{bench};
|
||||
|
||||
runner.generate_states();
|
||||
ASSERT(bench.get_states().size() == 8);
|
||||
for (const auto &inner_states : bench.get_states())
|
||||
for (const auto &state : bench.get_states())
|
||||
{
|
||||
ASSERT(inner_states.size() == 1);
|
||||
for (const auto &state : inner_states)
|
||||
{
|
||||
ASSERT(state.is_skipped() == false);
|
||||
}
|
||||
ASSERT(state.is_skipped() == false);
|
||||
}
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
runner.run();
|
||||
ASSERT(bench.get_states().size() == 8);
|
||||
for (const auto &inner_states : bench.get_states())
|
||||
for (const auto &state : bench.get_states())
|
||||
{
|
||||
ASSERT(inner_states.size() == 1);
|
||||
for (const auto &state : inner_states)
|
||||
{
|
||||
ASSERT(state.is_skipped() == true);
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
}
|
||||
ASSERT(state.is_skipped() == true);
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
}
|
||||
|
||||
const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool
|
||||
@@ -198,6 +187,7 @@ void test_both()
|
||||
using runner_type = nvbench::runner<benchmark_type>;
|
||||
|
||||
benchmark_type bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.set_type_axes_names({"FloatT", "IntT", "MiscT"});
|
||||
bench.add_int64_axis("Int", {1, 2, 3});
|
||||
bench.add_float64_axis("Float", {11.0, 12.0, 13.0});
|
||||
@@ -206,27 +196,19 @@ void test_both()
|
||||
runner_type runner{bench};
|
||||
|
||||
runner.generate_states();
|
||||
ASSERT(bench.get_states().size() == 8);
|
||||
for (const auto &inner_states : bench.get_states())
|
||||
ASSERT(bench.get_states().size() == 8 * 27);
|
||||
for (const auto &state : bench.get_states())
|
||||
{
|
||||
ASSERT(inner_states.size() == 27);
|
||||
for (const auto &state : inner_states)
|
||||
{
|
||||
ASSERT(state.is_skipped() == false);
|
||||
}
|
||||
ASSERT(state.is_skipped() == false);
|
||||
}
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
runner.run();
|
||||
ASSERT(bench.get_states().size() == 8);
|
||||
for (const auto &inner_states : bench.get_states())
|
||||
ASSERT(bench.get_states().size() == 8 * 27);
|
||||
for (const auto &state : bench.get_states())
|
||||
{
|
||||
ASSERT(inner_states.size() == 27);
|
||||
for (const auto &state : inner_states)
|
||||
{
|
||||
ASSERT(state.is_skipped() == true);
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
}
|
||||
ASSERT(state.is_skipped() == true);
|
||||
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
|
||||
}
|
||||
|
||||
const std::string ref =
|
||||
|
||||
@@ -13,9 +13,11 @@ NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable);
|
||||
using dummy_bench = nvbench::benchmark<dummy_callable>;
|
||||
|
||||
// Subclass to gain access to protected members for testing:
|
||||
namespace nvbench::detail
|
||||
{
|
||||
struct state_tester : public nvbench::state
|
||||
{
|
||||
state_tester(const nvbench::benchmark_base& bench)
|
||||
state_tester(const nvbench::benchmark_base &bench)
|
||||
: nvbench::state{bench}
|
||||
{}
|
||||
|
||||
@@ -27,6 +29,9 @@ struct state_tester : public nvbench::state
|
||||
std::forward<T>(value)});
|
||||
}
|
||||
};
|
||||
} // namespace nvbench::detail
|
||||
|
||||
using nvbench::detail::state_tester;
|
||||
|
||||
void test_params()
|
||||
{
|
||||
@@ -50,7 +55,7 @@ void test_summaries()
|
||||
ASSERT(state.get_summaries().size() == 0);
|
||||
|
||||
{
|
||||
nvbench::summary& summary = state.add_summary("Test Summary1");
|
||||
nvbench::summary &summary = state.add_summary("Test Summary1");
|
||||
summary.set_float64("Float", 3.14);
|
||||
summary.set_int64("Int", 128);
|
||||
summary.set_string("String", "str");
|
||||
|
||||
@@ -130,6 +130,7 @@ void test_basic()
|
||||
void test_create()
|
||||
{
|
||||
dummy_bench bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.add_float64_axis("Radians", {3.14, 6.28});
|
||||
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
bench.add_int64_axis("NumInputs",
|
||||
@@ -137,22 +138,14 @@ void test_create()
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
|
||||
|
||||
const std::vector<std::vector<nvbench::state>> states =
|
||||
const std::vector<nvbench::state> states =
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
// Outer vector has one entry per type_config. There are no type axes, so
|
||||
// there's only one type_config:
|
||||
ASSERT(states.size() == 1);
|
||||
|
||||
// Inner vectors have one entry per non-type config:
|
||||
// 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36
|
||||
for (const auto &inner_states : states)
|
||||
{
|
||||
ASSERT(inner_states.size() == 36);
|
||||
}
|
||||
ASSERT(states.size() == 36);
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
std::string table_format =
|
||||
const std::string table_format =
|
||||
"| {:^5} | {:^10} | {:^7} | {:^7} | {:^9} | {:^9} |\n";
|
||||
|
||||
fmt::format_to(buffer, "\n");
|
||||
@@ -165,22 +158,17 @@ void test_create()
|
||||
"NumInputs",
|
||||
"Strategy");
|
||||
|
||||
std::size_t type_config = 0;
|
||||
std::size_t config = 0;
|
||||
for (const auto &inner_states : states)
|
||||
std::size_t config = 0;
|
||||
for (const auto &state : states)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
{
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
type_config,
|
||||
state.get_float64("Radians"),
|
||||
state.get_int64("VecSize"),
|
||||
state.get_int64("NumInputs"),
|
||||
state.get_string("Strategy"));
|
||||
}
|
||||
type_config++;
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
state.get_type_config_index(),
|
||||
state.get_float64("Radians"),
|
||||
state.get_int64("VecSize"),
|
||||
state.get_int64("NumInputs"),
|
||||
state.get_string("Strategy"));
|
||||
}
|
||||
|
||||
const std::string ref =
|
||||
@@ -231,6 +219,7 @@ void test_create()
|
||||
void test_create_with_types()
|
||||
{
|
||||
template_bench bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.set_type_axes_names({"Floats", "Ints", "Misc"});
|
||||
bench.add_float64_axis("Radians", {3.14, 6.28});
|
||||
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
@@ -239,19 +228,13 @@ void test_create_with_types()
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
|
||||
|
||||
const std::vector<std::vector<nvbench::state>> states =
|
||||
const std::vector<nvbench::state> states =
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
// Outer vector has one entry per type_config
|
||||
// 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs
|
||||
ASSERT(states.size() == 8);
|
||||
|
||||
// Inner vectors have one entry per non-type config:
|
||||
// 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36
|
||||
for (const auto &inner_states : states)
|
||||
{
|
||||
ASSERT(inner_states.size() == 36);
|
||||
}
|
||||
// - 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs
|
||||
// - 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36 non_type
|
||||
// configs
|
||||
ASSERT(states.size() == 8 * 36);
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} "
|
||||
@@ -270,25 +253,20 @@ void test_create_with_types()
|
||||
"NumInputs",
|
||||
"Strategy");
|
||||
|
||||
std::size_t type_config = 0;
|
||||
std::size_t config = 0;
|
||||
for (const auto &inner_states : states)
|
||||
std::size_t config = 0;
|
||||
for (const auto &state : states)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
{
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
type_config,
|
||||
state.get_string("Floats"),
|
||||
state.get_string("Ints"),
|
||||
state.get_string("Misc"),
|
||||
state.get_float64("Radians"),
|
||||
state.get_int64("VecSize"),
|
||||
state.get_int64("NumInputs"),
|
||||
state.get_string("Strategy"));
|
||||
}
|
||||
type_config++;
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
state.get_type_config_index(),
|
||||
state.get_string("Floats"),
|
||||
state.get_string("Ints"),
|
||||
state.get_string("Misc"),
|
||||
state.get_float64("Radians"),
|
||||
state.get_int64("VecSize"),
|
||||
state.get_int64("NumInputs"),
|
||||
state.get_string("Strategy"));
|
||||
}
|
||||
|
||||
const std::string ref =
|
||||
@@ -591,6 +569,7 @@ void test_create_with_types()
|
||||
void test_create_with_masked_types()
|
||||
{
|
||||
template_bench bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.set_type_axes_names({"Floats", "Ints", "Misc"});
|
||||
bench.add_float64_axis("Radians", {3.14, 6.28});
|
||||
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
@@ -603,7 +582,7 @@ void test_create_with_masked_types()
|
||||
bench.get_axes().get_type_axis("Floats").set_active_inputs({"F32"});
|
||||
bench.get_axes().get_type_axis("Ints").set_active_inputs({"I64"});
|
||||
|
||||
const std::vector<std::vector<nvbench::state>> states =
|
||||
const std::vector<nvbench::state> states =
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
@@ -623,25 +602,20 @@ void test_create_with_masked_types()
|
||||
"NumInputs",
|
||||
"Strategy");
|
||||
|
||||
std::size_t type_config = 0;
|
||||
std::size_t config = 0;
|
||||
for (const auto &inner_states : states)
|
||||
std::size_t config = 0;
|
||||
for (const auto &state : states)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
{
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
type_config,
|
||||
state.get_string("Floats"),
|
||||
state.get_string("Ints"),
|
||||
state.get_string("Misc"),
|
||||
state.get_float64("Radians"),
|
||||
state.get_int64("VecSize"),
|
||||
state.get_int64("NumInputs"),
|
||||
state.get_string("Strategy"));
|
||||
}
|
||||
type_config++;
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
state.get_type_config_index(),
|
||||
state.get_string("Floats"),
|
||||
state.get_string("Ints"),
|
||||
state.get_string("Misc"),
|
||||
state.get_float64("Radians"),
|
||||
state.get_int64("VecSize"),
|
||||
state.get_int64("NumInputs"),
|
||||
state.get_string("Strategy"));
|
||||
}
|
||||
|
||||
const std::string ref =
|
||||
@@ -725,7 +699,69 @@ void test_create_with_masked_types()
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
|
||||
void test_devices()
|
||||
{
|
||||
const auto device_0 = nvbench::device_info{0, {}};
|
||||
const auto device_1 = nvbench::device_info{1, {}};
|
||||
const auto device_2 = nvbench::device_info{2, {}};
|
||||
|
||||
dummy_bench bench;
|
||||
bench.set_devices({device_0, device_1, device_2});
|
||||
bench.add_string_axis("S", {"foo", "bar"});
|
||||
bench.add_int64_axis("I", {2, 4});
|
||||
|
||||
const std::vector<nvbench::state> states =
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
// 3 devices * 4 axis configs = 12 total states
|
||||
ASSERT(states.size() == 12);
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
const std::string table_format =
|
||||
"| {:^5} | {:^6} | {:^5} | {:^3} |\n";
|
||||
|
||||
fmt::format_to(buffer, "\n");
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
"State",
|
||||
"Device",
|
||||
"S",
|
||||
"I");
|
||||
|
||||
std::size_t config = 0;
|
||||
for (const auto &state : states)
|
||||
{
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
config++,
|
||||
state.get_device()->get_id(),
|
||||
state.get_string("S"),
|
||||
state.get_int64("I"));
|
||||
}
|
||||
|
||||
const std::string ref =
|
||||
R"expected(
|
||||
| State | Device | S | I |
|
||||
| 0 | 0 | foo | 2 |
|
||||
| 1 | 0 | bar | 2 |
|
||||
| 2 | 0 | foo | 4 |
|
||||
| 3 | 0 | bar | 4 |
|
||||
| 4 | 1 | foo | 2 |
|
||||
| 5 | 1 | bar | 2 |
|
||||
| 6 | 1 | foo | 4 |
|
||||
| 7 | 1 | bar | 4 |
|
||||
| 8 | 2 | foo | 2 |
|
||||
| 9 | 2 | bar | 2 |
|
||||
| 10 | 2 | foo | 4 |
|
||||
| 11 | 2 | bar | 4 |
|
||||
)expected";
|
||||
|
||||
const std::string test = fmt::to_string(buffer);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
test_empty();
|
||||
test_single_state();
|
||||
@@ -733,4 +769,11 @@ int main()
|
||||
test_create();
|
||||
test_create_with_types();
|
||||
test_create_with_masked_types();
|
||||
test_devices();
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
fmt::print("{}\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user