Execute benchmarks on all devices.

This commit is contained in:
Allison Vacanti
2021-02-12 20:02:20 -05:00
parent 5348f65e12
commit 92cc3b1189
17 changed files with 534 additions and 323 deletions

View File

@@ -1,8 +1,14 @@
#include <nvbench/benchmark_base.cuh>
#include <nvbench/device_manager.cuh>
namespace nvbench
{
benchmark_base::benchmark_base()
: m_devices(nvbench::device_manager::get().get_devices())
{}
benchmark_base::~benchmark_base() = default;
std::unique_ptr<benchmark_base> benchmark_base::clone() const
@@ -16,5 +22,20 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
return std::move(result);
}
void benchmark_base::set_devices(std::vector<int> device_ids)
{
std::vector<device_info> devices;
devices.reserve(device_ids.size());
for (int dev_id : device_ids)
{
devices.emplace_back(dev_id);
}
this->set_devices(std::move(devices));
}
void benchmark_base::add_device(int device_id)
{
this->add_device(device_info{device_id});
}
} // namespace nvbench

View File

@@ -1,6 +1,7 @@
#pragma once
#include <nvbench/axes_metadata.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/state.cuh>
#include <memory>
@@ -21,6 +22,7 @@ struct runner;
*/
struct benchmark_base
{
benchmark_base();
virtual ~benchmark_base();
/**
@@ -77,25 +79,37 @@ struct benchmark_base
return *this;
}
[[nodiscard]] nvbench::axes_metadata &get_axes()
void set_devices(std::vector<int> device_ids);
void set_devices(std::vector<nvbench::device_info> devices)
{
return m_axes;
m_devices = std::move(devices);
}
void add_device(int device_id);
void add_device(nvbench::device_info device)
{
m_devices.push_back(std::move(device));
}
[[nodiscard]] const std::vector<nvbench::device_info> &get_devices() const
{
return m_devices;
}
[[nodiscard]] nvbench::axes_metadata &get_axes() { return m_axes; }
[[nodiscard]] const nvbench::axes_metadata &get_axes() const
{
return m_axes;
}
[[nodiscard]] const std::vector<std::vector<nvbench::state>> &
get_states() const
{
return m_states;
}
[[nodiscard]] std::vector<std::vector<nvbench::state>> &get_states()
[[nodiscard]] const std::vector<nvbench::state> &get_states() const
{
return m_states;
}
[[nodiscard]] std::vector<nvbench::state> &get_states() { return m_states; }
void run() { this->do_run(); }
@@ -105,7 +119,8 @@ protected:
std::string m_name;
nvbench::axes_metadata m_axes;
std::vector<std::vector<nvbench::state>> m_states;
std::vector<nvbench::device_info> m_devices;
std::vector<nvbench::state> m_states;
private:
// route these through virtuals so the templated subclass can inject type info

View File

@@ -218,6 +218,8 @@ void markdown_format::print_benchmark_summaries(
void markdown_format::print_benchmark_results(const benchmark_vector &benchmarks)
{
// This needs to be refactored and cleaned up (someday....) but here's a
// buncha functors that do various string formatting stuff:
auto format_visitor = [](const auto &v) {
using T = std::decay_t<decltype(v)>;
if constexpr (std::is_same_v<T, nvbench::float64_t>)
@@ -312,110 +314,129 @@ void markdown_format::print_benchmark_results(const benchmark_vector &benchmarks
return fmt::format("{:.2f}%", percentage);
};
fmt::print("# Benchmark Summaries\n");
// Start printing benchmarks
fmt::print("# Benchmark Results\n");
for (const auto &bench_ptr : benchmarks)
{
const benchmark_base &bench = *bench_ptr;
const axes_metadata &axes = bench.get_axes();
const auto &bench = *bench_ptr;
const auto &devices = bench.get_devices();
const auto &axes = bench.get_axes();
fmt::print("\n## {}\n\n", bench.get_name());
fmt::print("\n## {}\n", bench.get_name());
std::size_t row = 0;
table_builder table;
for (const auto &inner_states : bench.get_states())
// Do a single pass when no devices are specified. This happens for
// benchmarks with `cpu` exec_tags.
const std::size_t num_device_passes = devices.empty() ? 1 : devices.size();
for (std::size_t device_pass = 0; device_pass < num_device_passes;
++device_pass)
{
for (const nvbench::state &state : inner_states)
std::optional<nvbench::device_info> device =
devices.empty() ? std::nullopt
: std::make_optional(devices[device_pass]);
if (device)
{
const auto &axis_values = state.get_axis_values();
for (const auto &name : axis_values.get_names())
{
// Handle power-of-two int64 axes differently:
if (axis_values.get_type(name) == named_values::type::int64 &&
axes.get_int64_axis(name).is_power_of_two())
{
const nvbench::uint64_t value = axis_values.get_int64(name);
const nvbench::uint64_t exponent = int64_axis::compute_log2(value);
table.add_cell(row,
name + "_axis_pretty",
name,
fmt::format("2^{}", exponent));
table.add_cell(row,
name + "_axis_descriptive",
fmt::format("({})", name),
fmt::to_string(value));
}
else
{
std::string value = std::visit(format_visitor,
axis_values.get_value(name));
table.add_cell(row, name + "_axis", name, std::move(value));
}
}
for (const auto &summ : state.get_summaries())
{
if (summ.has_value("hide"))
{
continue;
}
const std::string &key = summ.get_name();
const std::string &header = summ.has_value("short_name")
? summ.get_string("short_name")
: key;
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
: std::string{};
if (hint == "duration")
{
table.add_cell(row,
key,
header,
format_duration(summ.get_float64("value")));
}
else if (hint == "item_rate")
{
table.add_cell(row,
key,
header,
format_item_rate(summ.get_float64("value")));
}
else if (hint == "bytes")
{
table.add_cell(row,
key,
header,
format_bytes(summ.get_int64("value")));
}
else if (hint == "byte_rate")
{
table.add_cell(row,
key,
header,
format_byte_rate(summ.get_float64("value")));
}
else if (hint == "percentage")
{
table.add_cell(row,
key,
header,
format_percentage(summ.get_float64("value")));
}
else
{
table.add_cell(row,
key,
header,
std::visit(format_visitor, summ.get_value("value")));
}
}
row++;
fmt::print("\n### [{}] {}\n\n", device->get_id(), device->get_name());
}
}
fmt::print("{}", table.to_string());
} // end foreach benchmark
std::size_t row = 0;
table_builder table;
for (const auto &cur_state : bench.get_states())
{
if (cur_state.get_device() == device)
{
const auto &axis_values = cur_state.get_axis_values();
for (const auto &name : axis_values.get_names())
{
// Handle power-of-two int64 axes differently:
if (axis_values.get_type(name) == named_values::type::int64 &&
axes.get_int64_axis(name).is_power_of_two())
{
const nvbench::int64_t value = axis_values.get_int64(name);
const nvbench::int64_t exponent = int64_axis::compute_log2(value);
table.add_cell(row,
name + "_axis_pretty",
name,
fmt::format("2^{}", exponent));
table.add_cell(row,
name + "_axis_descriptive",
fmt::format("({})", name),
fmt::to_string(value));
}
else
{
std::string value = std::visit(format_visitor,
axis_values.get_value(name));
table.add_cell(row, name + "_axis", name, std::move(value));
}
}
for (const auto &summ : cur_state.get_summaries())
{
if (summ.has_value("hide"))
{
continue;
}
const std::string &key = summ.get_name();
const std::string &header = summ.has_value("short_name")
? summ.get_string("short_name")
: key;
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
: std::string{};
if (hint == "duration")
{
table.add_cell(row,
key,
header,
format_duration(summ.get_float64("value")));
}
else if (hint == "item_rate")
{
table.add_cell(row,
key,
header,
format_item_rate(summ.get_float64("value")));
}
else if (hint == "bytes")
{
table.add_cell(row,
key,
header,
format_bytes(summ.get_int64("value")));
}
else if (hint == "byte_rate")
{
table.add_cell(row,
key,
header,
format_byte_rate(summ.get_float64("value")));
}
else if (hint == "percentage")
{
table.add_cell(row,
key,
header,
format_percentage(summ.get_float64("value")));
}
else
{
table.add_cell(row,
key,
header,
std::visit(format_visitor,
summ.get_value("value")));
}
}
row++;
}
}
fmt::print("{}", table.to_string());
} // end foreach device_pass
}
}
} // namespace detail

View File

@@ -8,6 +8,7 @@
#include <algorithm>
#include <cstdio>
#include <stdexcept>
#include <variant>
namespace nvbench
@@ -16,9 +17,29 @@ namespace nvbench
namespace detail
{
void measure_cold_base::check()
{
const auto device = m_state.get_device();
if (!device)
{
throw std::runtime_error(fmt::format("{}:{}: Device required for `cold` "
"measurement.",
__FILE__,
__LINE__));
}
if (!device->is_active())
{ // This means something went wrong higher up. Throw an error.
throw std::runtime_error(fmt::format("{}:{}: Internal error: Current "
"device is not active.",
__FILE__,
__LINE__));
}
}
void measure_cold_base::generate_summaries()
{
const auto avg_cuda_time = m_total_cuda_time / m_total_iters;
const auto d_iters = static_cast<double>(m_total_iters);
const auto avg_cuda_time = m_total_cuda_time / d_iters;
{
auto &summ = m_state.add_summary("Average GPU Time (Cold)");
summ.set_string("hint", "duration");
@@ -39,7 +60,7 @@ void measure_cold_base::generate_summaries()
summ.set_float64("value", m_cuda_noise);
}
const auto avg_cpu_time = m_total_cpu_time / m_total_iters;
const auto avg_cpu_time = m_total_cpu_time / d_iters;
{
auto &summ = m_state.add_summary("Average CPU Time (Cold)");
summ.set_string("hint", "duration");
@@ -70,7 +91,7 @@ void measure_cold_base::generate_summaries()
// Log to stdout:
fmt::memory_buffer param_buffer;
fmt::format_to(param_buffer, "");
fmt::format_to(param_buffer, "Device={}", m_state.get_device()->get_id());
const axes_metadata &axes = m_state.get_benchmark().get_axes();
const auto &axis_values = m_state.get_axis_values();
for (const auto &name : axis_values.get_names())

View File

@@ -3,6 +3,7 @@
#include <nvbench/cpu_timer.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/state.cuh>
@@ -33,6 +34,9 @@ struct measure_cold_base
measure_cold_base &operator=(measure_cold_base &&) = delete;
protected:
void check();
void initialize()
{
m_total_cuda_time = 0.;
@@ -54,15 +58,15 @@ protected:
nvbench::cpu_timer m_cpu_timer;
nvbench::detail::l2flush m_l2flush;
nvbench::int64_t m_min_iters{100};
nvbench::int64_t m_min_iters{10};
nvbench::int64_t m_total_iters{};
nvbench::float64_t m_max_noise{1.0}; // % rel stdev
nvbench::float64_t m_max_noise{0.5}; // % rel stdev
nvbench::float64_t m_cuda_noise{}; // % rel stdev
nvbench::float64_t m_cpu_noise{}; // % rel stdev
nvbench::float64_t m_min_time{0.5};
nvbench::float64_t m_max_time{1.0};
nvbench::float64_t m_max_time{3.0};
nvbench::float64_t m_total_cuda_time{};
nvbench::float64_t m_total_cpu_time{};
@@ -83,6 +87,7 @@ struct measure_cold : public measure_cold_base
void operator()()
{
this->check();
this->initialize();
this->run_warmup();
this->run_trials();

View File

@@ -10,16 +10,31 @@
#include <cstdio>
#include <variant>
// TODO these can be removed once there's a device_manager or some such:
#include <cuda_runtime_api.h>
#include <nvbench/cuda_call.cuh>
namespace nvbench
{
namespace detail
{
void measure_hot_base::check()
{
const auto device = m_state.get_device();
if (!device)
{
throw std::runtime_error(fmt::format("{}:{}: Device required for `hot` "
"measurement.",
__FILE__,
__LINE__));
}
if (!device->is_active())
{ // This means something went wrong higher up. Throw an error.
throw std::runtime_error(fmt::format("{}:{}: Internal error: Current "
"device is not active.",
__FILE__,
__LINE__));
}
}
measure_hot_base::measure_hot_base(state &exec_state)
: m_state(exec_state)
{
@@ -48,7 +63,8 @@ measure_hot_base::measure_hot_base(state &exec_state)
void measure_hot_base::generate_summaries()
{
const auto avg_cuda_time = m_total_cuda_time / m_total_iters;
const auto d_iters = static_cast<double>(m_total_iters);
const auto avg_cuda_time = m_total_cuda_time / d_iters;
{
auto &summ = m_state.add_summary("Average GPU Time (Hot)");
summ.set_string("hint", "duration");
@@ -59,7 +75,7 @@ void measure_hot_base::generate_summaries()
summ.set_float64("value", avg_cuda_time);
}
const auto avg_cpu_time = m_total_cpu_time / m_total_iters;
const auto avg_cpu_time = m_total_cpu_time / d_iters;
{
auto &summ = m_state.add_summary("Average CPU Time (Hot)");
summ.set_string("hide",
@@ -86,13 +102,13 @@ void measure_hot_base::generate_summaries()
summ.set_string("hint", "item_rate");
summ.set_string("short_name", "Item Rate");
summ.set_string("description", "Number of input items handled per second.");
summ.set_float64("value", items / avg_cuda_time);
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
}
if (const auto bytes = m_state.get_global_bytes_accessed_per_launch();
bytes != 0)
{
const auto avg_used_gmem_bw = bytes / avg_cuda_time;
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
{
auto &summ = m_state.add_summary("Average Global Memory Throughput");
summ.set_string("hint", "byte_rate");
@@ -103,16 +119,10 @@ void measure_hot_base::generate_summaries()
summ.set_float64("value", avg_used_gmem_bw);
}
// TODO cache this in a singleton somewhere.
int dev_id{};
cudaDeviceProp prop{};
NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id));
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&prop, dev_id));
// clock rate in khz, width in bits. Result in bytes/sec.
const auto peak_gmem_bw = 2 * 1000. * prop.memoryClockRate * // (sec^-1)
prop.memoryBusWidth / CHAR_BIT; // bytes
{
const auto peak_gmem_bw = static_cast<double>(
m_state.get_device()->get_global_memory_bus_bandwidth());
auto &summ = m_state.add_summary("Percent Peak Global Memory Throughput");
summ.set_string("hint", "percentage");
summ.set_string("short_name", "PeakGMem");
@@ -125,7 +135,7 @@ void measure_hot_base::generate_summaries()
// Log to stdout:
fmt::memory_buffer param_buffer;
fmt::format_to(param_buffer, "");
fmt::format_to(param_buffer, "Device={}", m_state.get_device()->get_id());
const axes_metadata &axes = m_state.get_benchmark().get_axes();
const auto &axis_values = m_state.get_axis_values();
for (const auto &name : axis_values.get_names())
@@ -140,8 +150,8 @@ void measure_hot_base::generate_summaries()
if (axis_values.get_type(name) == named_values::type::int64 &&
axes.get_int64_axis(name).is_power_of_two())
{
const nvbench::uint64_t value = axis_values.get_int64(name);
const nvbench::uint64_t exponent = int64_axis::compute_log2(value);
const nvbench::int64_t value = axis_values.get_int64(name);
const nvbench::int64_t exponent = int64_axis::compute_log2(value);
fmt::format_to(param_buffer, "2^{}", exponent);
}
else

View File

@@ -26,6 +26,9 @@ struct measure_hot_base
measure_hot_base &operator=(measure_hot_base &&) = delete;
protected:
void check();
void initialize()
{
m_total_cpu_time = 0.;
@@ -43,10 +46,10 @@ protected:
nvbench::cpu_timer m_cpu_timer;
nvbench::int64_t m_total_iters{};
nvbench::int64_t m_min_iters{100};
nvbench::int64_t m_min_iters{10};
nvbench::float64_t m_min_time{0.5};
nvbench::float64_t m_max_time{1.0};
nvbench::float64_t m_max_time{3.0};
nvbench::float64_t m_total_cuda_time{};
nvbench::float64_t m_total_cpu_time{};
@@ -64,6 +67,7 @@ struct measure_hot : public measure_hot_base
void operator()()
{
this->check();
this->initialize();
this->run_warmup();
this->run_trials();

View File

@@ -1,6 +1,7 @@
#include <nvbench/detail/state_generator.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/named_values.cuh>
#include <nvbench/type_axis.cuh>
@@ -207,36 +208,53 @@ void state_generator::build_axis_configs()
void state_generator::build_states()
{
// Assemble states into a std::vector<std::vector<nvbench::state>>, where the
// outer vector has one inner vector per type_config, and all configs in an
// inner vector use the same type config. This should probably be wrapped up
// into a nicer data structure, but organizing states in this way makes
// matching up states to kernel_generator instantiations much easier during
// dispatch.
m_states.clear();
m_states.reserve(m_type_axis_configs.size());
for (const auto &[type_config, axis_mask] : m_type_axis_configs)
const auto &devices = m_benchmark.get_devices();
if (devices.empty())
{
auto &inner_states = m_states.emplace_back();
this->add_states_for_device(std::nullopt);
}
else
{
for (const auto &device : devices)
{
this->add_states_for_device(device);
}
}
}
void state_generator::add_states_for_device(
const std::optional<device_info> &device)
{
const auto num_type_configs = m_type_axis_configs.size();
for (std::size_t type_config_index = 0; type_config_index < num_type_configs;
++type_config_index)
{
const auto &[type_config,
axis_mask] = m_type_axis_configs[type_config_index];
if (!axis_mask)
{ // Don't generate inner vector if the type config is masked out.
continue;
}
inner_states.reserve(m_non_type_axis_configs.size());
for (const auto &non_type_config : m_non_type_axis_configs)
{
// Concatenate the type + non_type configurations:
nvbench::named_values config = type_config;
config.append(non_type_config);
inner_states.push_back(nvbench::state{m_benchmark, config});
// Create benchmark:
m_states.push_back(nvbench::state{m_benchmark,
std::move(config),
device,
type_config_index});
}
}
}
std::vector<std::vector<nvbench::state>>
state_generator::create(const benchmark_base &bench)
std::vector<nvbench::state> state_generator::create(const benchmark_base &bench)
{
state_generator sg{bench};
sg.build_axis_configs();

View File

@@ -4,6 +4,7 @@
#include <nvbench/axis_base.cuh>
#include <nvbench/state.cuh>
#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -11,25 +12,27 @@
namespace nvbench
{
struct benchmark_base;
struct device_info;
namespace detail
{
struct state_generator
{
static std::vector<std::vector<nvbench::state>>
create(const benchmark_base &bench);
static std::vector<nvbench::state> create(const benchmark_base &bench);
private:
explicit state_generator(const benchmark_base &bench);
void build_axis_configs();
void build_states();
void add_states_for_device(const std::optional<nvbench::device_info> &device);
const benchmark_base &m_benchmark;
// bool is a mask value; true if the config is used.
std::vector<std::pair<nvbench::named_values, bool>> m_type_axis_configs;
std::vector<nvbench::named_values> m_non_type_axis_configs;
std::vector<std::vector<nvbench::state>> m_states;
std::vector<nvbench::state> m_states;
};
// Detail class; Generates a cartesian product of axis indices.
@@ -73,6 +76,5 @@ struct state_iterator
std::size_t m_total{};
};
} // namespace detail
} // namespace nvbench

View File

@@ -15,11 +15,17 @@ namespace nvbench
namespace detail
{
int get_ptx_version(int);
}
} // namespace detail
struct device_info
{
explicit device_info(int id);
explicit device_info(int device_id);
// Mainly used by unit tests:
device_info(int device_id, cudaDeviceProp prop)
: m_id{device_id}
, m_prop{prop}
{}
/// @return The device's id on the current system.
[[nodiscard]] int get_id() const { return m_id; }
@@ -30,6 +36,18 @@ struct device_info
return std::string_view(m_prop.name);
}
[[nodiscard]] bool is_active() const
{
int id{-1};
NVBENCH_CUDA_CALL(cudaGetDevice(&id));
return id == m_id;
}
void set_active() const
{
NVBENCH_CUDA_CALL(cudaSetDevice(m_id));
}
/// @return The SM version of the current device as (major*100) + (minor*10).
[[nodiscard]] int get_sm_version() const
{
@@ -145,6 +163,15 @@ struct device_info
return m_prop;
}
[[nodiscard]] bool operator==(const device_info &o) const
{
return m_id == o.m_id;
}
[[nodiscard]] bool operator!=(const device_info &o) const
{
return m_id != o.m_id;
}
private:
int m_id;
cudaDeviceProp m_prop;
@@ -152,6 +179,8 @@ private:
// get_ptx_version implementation; this needs to stay in the header so it will
// pick up the downstream project's compilation settings.
// TODO this is fragile and will break when called from any library
// translation unit.
namespace detail
{
// Templated to workaround ODR issues since __global__functions cannot be marked

View File

@@ -29,24 +29,51 @@ struct runner
void run()
{
auto states_iter = m_benchmark.m_states.begin();
if (states_iter + num_type_configs != m_benchmark.m_states.end())
if (m_benchmark.m_devices.empty())
{
throw std::runtime_error("State vector doesn't match type_configs.");
this->run_device(std::nullopt);
}
else
{
for (const auto &device : m_benchmark.m_devices)
{
this->run_device(device);
}
}
nvbench::tl::foreach<type_configs>(
[&states_iter](auto type_config_wrapper) {
using type_config = typename decltype(type_config_wrapper)::type;
for (nvbench::state &cur_state : *states_iter)
{
kernel_generator{}(cur_state, type_config{});
}
states_iter++;
});
}
private:
void run_device(const std::optional<nvbench::device_info> &device)
{
if (device)
{
device->set_active();
}
// Iterate through type_configs:
std::size_t type_config_index = 0;
nvbench::tl::foreach<type_configs>([&states = m_benchmark.m_states,
&type_config_index,
&device](auto type_config_wrapper) {
// Get current type_config:
using type_config = typename decltype(type_config_wrapper)::type;
// Find states with the current device / type_config
for (nvbench::state &cur_state : states)
{
if (cur_state.get_device() == device &&
cur_state.get_type_config_index() == type_config_index)
{
kernel_generator{}(cur_state, type_config{});
}
}
++type_config_index;
});
}
benchmark_type &m_benchmark;
};

View File

@@ -1,10 +1,12 @@
#pragma once
#include <nvbench/device_info.cuh>
#include <nvbench/named_values.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/types.cuh>
#include <functional>
#include <optional>
#include <string>
#include <vector>
@@ -17,7 +19,7 @@ namespace detail
{
struct state_generator;
struct state_tester;
}
} // namespace detail
/**
* Stores all information about a particular benchmark configuration.
@@ -41,6 +43,20 @@ struct state
state &operator=(const state &) = delete;
state &operator=(state &&) = default;
/// The CUDA device associated with with this benchmark state. May be
/// nullopt for CPU-only benchmarks.
[[nodiscard]] const std::optional<nvbench::device_info> &get_device() const
{
return m_device;
}
/// An index into a benchmark::type_configs type_list. Returns 0 if no type
/// axes in the associated benchmark.
[[nodiscard]] std::size_t get_type_config_index() const
{
return m_type_config_index;
}
[[nodiscard]] nvbench::int64_t get_int64(const std::string &axis_name) const;
[[nodiscard]] nvbench::float64_t
@@ -99,13 +115,21 @@ private:
: m_benchmark{bench}
{}
state(const benchmark_base &bench, nvbench::named_values values)
state(const benchmark_base &bench,
nvbench::named_values values,
std::optional<nvbench::device_info> device,
std::size_t type_config_index)
: m_benchmark{bench}
, m_axis_values{std::move(values)}
, m_device{std::move(device)}
, m_type_config_index{type_config_index}
{}
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
nvbench::named_values m_axis_values;
std::optional<nvbench::device_info> m_device;
std::size_t m_type_config_index{};
std::vector<nvbench::summary> m_summaries;
std::string m_skip_reason;
nvbench::int64_t m_items_processed_per_launch{};

View File

@@ -83,18 +83,15 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench,
std::size_t num_type_configs,
std::size_t states_per_type_config)
{
bench.set_devices(std::vector<int>{});
bench.run();
fmt::memory_buffer buffer;
const auto &states = bench.get_states();
ASSERT(states.size() == num_type_configs);
for (const auto &inner_states : states)
ASSERT(states.size() == num_type_configs * states_per_type_config);
for (const auto &state : states)
{
ASSERT(inner_states.size() == states_per_type_config);
for (const auto &state : inner_states)
{
ASSERT(state.is_skipped());
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
}
ASSERT(state.is_skipped());
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
}
return fmt::to_string(buffer);
}

View File

@@ -32,7 +32,7 @@ namespace
{
[[nodiscard]] std::string
states_to_string(const std::vector<std::vector<nvbench::state>> &states)
states_to_string(const std::vector<nvbench::state> &states)
{
fmt::memory_buffer buffer;
std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} "
@@ -50,24 +50,19 @@ states_to_string(const std::vector<std::vector<nvbench::state>> &states)
"Floats",
"Strings");
std::size_t type_config = 0;
std::size_t config = 0;
for (const auto &inner_states : states)
std::size_t config = 0;
for (const auto &state : states)
{
for (const nvbench::state &state : inner_states)
{
fmt::format_to(buffer,
table_format,
config++,
type_config,
state.get_string("T"),
state.get_string("U"),
state.get_int64("Ints"),
state.get_int64("PO2s"),
state.get_float64("Floats"),
std::string{"\'"} + state.get_string("Strings") + "'");
}
type_config++;
fmt::format_to(buffer,
table_format,
config++,
state.get_type_config_index(),
state.get_string("T"),
state.get_string("U"),
state.get_int64("Ints"),
state.get_int64("PO2s"),
state.get_float64("Floats"),
std::string{"\'"} + state.get_string("Strings") + "'");
}
return fmt::to_string(buffer);
}
@@ -333,8 +328,7 @@ void test_int64_axis_pow2_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = 7 "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = 7 "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -451,8 +445,7 @@ void test_int64_axis_none_to_pow2_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = 7 "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = 7 "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -569,8 +562,7 @@ void test_int64_axis_pow2_to_none_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = 2 "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = 2 "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -584,8 +576,7 @@ void test_int64_axis_pow2_to_none_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -687,8 +678,7 @@ void test_float64_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Floats [ ] = 3.5 "});
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = 3.5 "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -727,8 +717,7 @@ void test_float64_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", "Floats=[3.5:3.6]"});
parser.parse({"--benchmark", "TestBench", "--axis", "Floats=[3.5:3.6]"});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -893,8 +882,7 @@ void test_type_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " T [ ] = U8 "});
parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = U8 "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -908,8 +896,7 @@ void test_type_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " T [ ] = [ U8 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = [ U8 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}

View File

@@ -73,12 +73,10 @@ void test_empty()
runner.generate_states();
ASSERT(bench.get_states().size() == 1);
ASSERT(bench.get_states().front().size() == 1);
ASSERT(bench.get_states().front().front().is_skipped() == false);
ASSERT(bench.get_states().front().is_skipped() == false);
runner.run();
ASSERT(bench.get_states().size() == 1);
ASSERT(bench.get_states().front().size() == 1);
ASSERT(bench.get_states().front().front().is_skipped() == true);
ASSERT(bench.get_states().front().is_skipped() == true);
}
void test_non_types()
@@ -94,18 +92,16 @@ void test_non_types()
runner_type runner{bench};
runner.generate_states();
ASSERT(bench.get_states().size() == 1);
ASSERT(bench.get_states().front().size() == 27);
for (const auto &state : bench.get_states().front())
ASSERT(bench.get_states().size() == 27);
for (const auto &state : bench.get_states())
{
ASSERT(state.is_skipped() == false);
}
fmt::memory_buffer buffer;
runner.run();
ASSERT(bench.get_states().size() == 1);
ASSERT(bench.get_states().front().size() == 27);
for (const auto &state : bench.get_states().front())
ASSERT(bench.get_states().size() == 27);
for (const auto &state : bench.get_states())
{
ASSERT(state.is_skipped() == true);
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
@@ -150,32 +146,25 @@ void test_types()
using runner_type = nvbench::runner<benchmark_type>;
benchmark_type bench;
bench.set_devices(std::vector<int>{});
bench.set_type_axes_names({"FloatT", "IntT", "MiscT"});
runner_type runner{bench};
runner.generate_states();
ASSERT(bench.get_states().size() == 8);
for (const auto &inner_states : bench.get_states())
for (const auto &state : bench.get_states())
{
ASSERT(inner_states.size() == 1);
for (const auto &state : inner_states)
{
ASSERT(state.is_skipped() == false);
}
ASSERT(state.is_skipped() == false);
}
fmt::memory_buffer buffer;
runner.run();
ASSERT(bench.get_states().size() == 8);
for (const auto &inner_states : bench.get_states())
for (const auto &state : bench.get_states())
{
ASSERT(inner_states.size() == 1);
for (const auto &state : inner_states)
{
ASSERT(state.is_skipped() == true);
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
}
ASSERT(state.is_skipped() == true);
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
}
const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool
@@ -198,6 +187,7 @@ void test_both()
using runner_type = nvbench::runner<benchmark_type>;
benchmark_type bench;
bench.set_devices(std::vector<int>{});
bench.set_type_axes_names({"FloatT", "IntT", "MiscT"});
bench.add_int64_axis("Int", {1, 2, 3});
bench.add_float64_axis("Float", {11.0, 12.0, 13.0});
@@ -206,27 +196,19 @@ void test_both()
runner_type runner{bench};
runner.generate_states();
ASSERT(bench.get_states().size() == 8);
for (const auto &inner_states : bench.get_states())
ASSERT(bench.get_states().size() == 8 * 27);
for (const auto &state : bench.get_states())
{
ASSERT(inner_states.size() == 27);
for (const auto &state : inner_states)
{
ASSERT(state.is_skipped() == false);
}
ASSERT(state.is_skipped() == false);
}
fmt::memory_buffer buffer;
runner.run();
ASSERT(bench.get_states().size() == 8);
for (const auto &inner_states : bench.get_states())
ASSERT(bench.get_states().size() == 8 * 27);
for (const auto &state : bench.get_states())
{
ASSERT(inner_states.size() == 27);
for (const auto &state : inner_states)
{
ASSERT(state.is_skipped() == true);
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
}
ASSERT(state.is_skipped() == true);
fmt::format_to(buffer, "{}\n", state.get_skip_reason());
}
const std::string ref =

View File

@@ -13,9 +13,11 @@ NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable);
using dummy_bench = nvbench::benchmark<dummy_callable>;
// Subclass to gain access to protected members for testing:
namespace nvbench::detail
{
struct state_tester : public nvbench::state
{
state_tester(const nvbench::benchmark_base& bench)
state_tester(const nvbench::benchmark_base &bench)
: nvbench::state{bench}
{}
@@ -27,6 +29,9 @@ struct state_tester : public nvbench::state
std::forward<T>(value)});
}
};
} // namespace nvbench::detail
using nvbench::detail::state_tester;
void test_params()
{
@@ -50,7 +55,7 @@ void test_summaries()
ASSERT(state.get_summaries().size() == 0);
{
nvbench::summary& summary = state.add_summary("Test Summary1");
nvbench::summary &summary = state.add_summary("Test Summary1");
summary.set_float64("Float", 3.14);
summary.set_int64("Int", 128);
summary.set_string("String", "str");

View File

@@ -130,6 +130,7 @@ void test_basic()
void test_create()
{
dummy_bench bench;
bench.set_devices(std::vector<int>{});
bench.add_float64_axis("Radians", {3.14, 6.28});
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
bench.add_int64_axis("NumInputs",
@@ -137,22 +138,14 @@ void test_create()
nvbench::int64_axis_flags::power_of_two);
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
const std::vector<std::vector<nvbench::state>> states =
const std::vector<nvbench::state> states =
nvbench::detail::state_generator::create(bench);
// Outer vector has one entry per type_config. There are no type axes, so
// there's only one type_config:
ASSERT(states.size() == 1);
// Inner vectors have one entry per non-type config:
// 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36
for (const auto &inner_states : states)
{
ASSERT(inner_states.size() == 36);
}
ASSERT(states.size() == 36);
fmt::memory_buffer buffer;
std::string table_format =
const std::string table_format =
"| {:^5} | {:^10} | {:^7} | {:^7} | {:^9} | {:^9} |\n";
fmt::format_to(buffer, "\n");
@@ -165,22 +158,17 @@ void test_create()
"NumInputs",
"Strategy");
std::size_t type_config = 0;
std::size_t config = 0;
for (const auto &inner_states : states)
std::size_t config = 0;
for (const auto &state : states)
{
for (const nvbench::state &state : inner_states)
{
fmt::format_to(buffer,
table_format,
config++,
type_config,
state.get_float64("Radians"),
state.get_int64("VecSize"),
state.get_int64("NumInputs"),
state.get_string("Strategy"));
}
type_config++;
fmt::format_to(buffer,
table_format,
config++,
state.get_type_config_index(),
state.get_float64("Radians"),
state.get_int64("VecSize"),
state.get_int64("NumInputs"),
state.get_string("Strategy"));
}
const std::string ref =
@@ -231,6 +219,7 @@ void test_create()
void test_create_with_types()
{
template_bench bench;
bench.set_devices(std::vector<int>{});
bench.set_type_axes_names({"Floats", "Ints", "Misc"});
bench.add_float64_axis("Radians", {3.14, 6.28});
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
@@ -239,19 +228,13 @@ void test_create_with_types()
nvbench::int64_axis_flags::power_of_two);
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
const std::vector<std::vector<nvbench::state>> states =
const std::vector<nvbench::state> states =
nvbench::detail::state_generator::create(bench);
// Outer vector has one entry per type_config
// 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs
ASSERT(states.size() == 8);
// Inner vectors have one entry per non-type config:
// 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36
for (const auto &inner_states : states)
{
ASSERT(inner_states.size() == 36);
}
// - 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs
// - 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36 non_type
// configs
ASSERT(states.size() == 8 * 36);
fmt::memory_buffer buffer;
std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} "
@@ -270,25 +253,20 @@ void test_create_with_types()
"NumInputs",
"Strategy");
std::size_t type_config = 0;
std::size_t config = 0;
for (const auto &inner_states : states)
std::size_t config = 0;
for (const auto &state : states)
{
for (const nvbench::state &state : inner_states)
{
fmt::format_to(buffer,
table_format,
config++,
type_config,
state.get_string("Floats"),
state.get_string("Ints"),
state.get_string("Misc"),
state.get_float64("Radians"),
state.get_int64("VecSize"),
state.get_int64("NumInputs"),
state.get_string("Strategy"));
}
type_config++;
fmt::format_to(buffer,
table_format,
config++,
state.get_type_config_index(),
state.get_string("Floats"),
state.get_string("Ints"),
state.get_string("Misc"),
state.get_float64("Radians"),
state.get_int64("VecSize"),
state.get_int64("NumInputs"),
state.get_string("Strategy"));
}
const std::string ref =
@@ -591,6 +569,7 @@ void test_create_with_types()
void test_create_with_masked_types()
{
template_bench bench;
bench.set_devices(std::vector<int>{});
bench.set_type_axes_names({"Floats", "Ints", "Misc"});
bench.add_float64_axis("Radians", {3.14, 6.28});
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
@@ -603,7 +582,7 @@ void test_create_with_masked_types()
bench.get_axes().get_type_axis("Floats").set_active_inputs({"F32"});
bench.get_axes().get_type_axis("Ints").set_active_inputs({"I64"});
const std::vector<std::vector<nvbench::state>> states =
const std::vector<nvbench::state> states =
nvbench::detail::state_generator::create(bench);
fmt::memory_buffer buffer;
@@ -623,25 +602,20 @@ void test_create_with_masked_types()
"NumInputs",
"Strategy");
std::size_t type_config = 0;
std::size_t config = 0;
for (const auto &inner_states : states)
std::size_t config = 0;
for (const auto &state : states)
{
for (const nvbench::state &state : inner_states)
{
fmt::format_to(buffer,
table_format,
config++,
type_config,
state.get_string("Floats"),
state.get_string("Ints"),
state.get_string("Misc"),
state.get_float64("Radians"),
state.get_int64("VecSize"),
state.get_int64("NumInputs"),
state.get_string("Strategy"));
}
type_config++;
fmt::format_to(buffer,
table_format,
config++,
state.get_type_config_index(),
state.get_string("Floats"),
state.get_string("Ints"),
state.get_string("Misc"),
state.get_float64("Radians"),
state.get_int64("VecSize"),
state.get_int64("NumInputs"),
state.get_string("Strategy"));
}
const std::string ref =
@@ -725,7 +699,69 @@ void test_create_with_masked_types()
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
void test_devices()
{
const auto device_0 = nvbench::device_info{0, {}};
const auto device_1 = nvbench::device_info{1, {}};
const auto device_2 = nvbench::device_info{2, {}};
dummy_bench bench;
bench.set_devices({device_0, device_1, device_2});
bench.add_string_axis("S", {"foo", "bar"});
bench.add_int64_axis("I", {2, 4});
const std::vector<nvbench::state> states =
nvbench::detail::state_generator::create(bench);
// 3 devices * 4 axis configs = 12 total states
ASSERT(states.size() == 12);
fmt::memory_buffer buffer;
const std::string table_format =
"| {:^5} | {:^6} | {:^5} | {:^3} |\n";
fmt::format_to(buffer, "\n");
fmt::format_to(buffer,
table_format,
"State",
"Device",
"S",
"I");
std::size_t config = 0;
for (const auto &state : states)
{
fmt::format_to(buffer,
table_format,
config++,
state.get_device()->get_id(),
state.get_string("S"),
state.get_int64("I"));
}
const std::string ref =
R"expected(
| State | Device | S | I |
| 0 | 0 | foo | 2 |
| 1 | 0 | bar | 2 |
| 2 | 0 | foo | 4 |
| 3 | 0 | bar | 4 |
| 4 | 1 | foo | 2 |
| 5 | 1 | bar | 2 |
| 6 | 1 | foo | 4 |
| 7 | 1 | bar | 4 |
| 8 | 2 | foo | 2 |
| 9 | 2 | bar | 2 |
| 10 | 2 | foo | 4 |
| 11 | 2 | bar | 4 |
)expected";
const std::string test = fmt::to_string(buffer);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
int main()
try
{
test_empty();
test_single_state();
@@ -733,4 +769,11 @@ int main()
test_create();
test_create_with_types();
test_create_with_masked_types();
test_devices();
return 0;
}
catch (std::exception& e)
{
fmt::print("{}\n", e.what());
return 1;
}