Polishing up the proof of concept.

- Enable throughput stats
- Add cold measurements
- Print liveness/log messages while running trials.
- Improve markdown formatting
- nvbench::range
This commit is contained in:
Allison Vacanti
2021-01-02 01:45:12 -05:00
parent d93462ca09
commit 0f3f1ef899
15 changed files with 524 additions and 70 deletions

View File

@@ -12,6 +12,7 @@ set(srcs
type_axis.cu
detail/markdown_format.cu
detail/measure_cold.cu
detail/measure_hot.cu
detail/state_generator.cu
)

View File

@@ -0,0 +1,49 @@
#pragma once
#include <nvbench/cuda_call.cuh>
#include <cuda_runtime_api.h>
namespace nvbench
{
namespace detail
{
struct l2flush
{
l2flush()
: m_l2_buffer{nullptr}
{
int dev_id{};
NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id));
NVBENCH_CUDA_CALL(
cudaDeviceGetAttribute(&m_l2_size, cudaDevAttrL2CacheSize, dev_id));
if (m_l2_size > 0)
{
NVBENCH_CUDA_CALL(cudaMalloc(&m_l2_buffer, m_l2_size));
}
}
~l2flush()
{
if (m_l2_buffer)
{
NVBENCH_CUDA_CALL(cudaFree(m_l2_buffer));
}
}
void flush(cudaStream_t stream)
{
if (m_l2_size > 0)
{
NVBENCH_CUDA_CALL(cudaMemsetAsync(m_l2_buffer, 0, m_l2_size, stream));
}
}
private:
int m_l2_size;
int *m_l2_buffer;
};
} // namespace detail
} // namespace nvbench

View File

@@ -124,7 +124,7 @@ void markdown_format::print()
using T = std::decay_t<decltype(v)>;
if constexpr (std::is_same_v<T, nvbench::float64_t>)
{
return fmt::format("{:7.5g}", v);
return fmt::format("{:.5g}", v);
}
else if constexpr (std::is_same_v<T, std::string>)
{
@@ -136,22 +136,50 @@ void markdown_format::print()
auto format_duration = [](nvbench::float64_t seconds) {
if (seconds >= 1.) // 1+ sec
{
return fmt::format("{:5.2f} s", seconds);
return fmt::format("{:.2f} s", seconds);
}
else if (seconds >= 1e-1) // 100+ ms.
else if (seconds >= 1e-2) // 10+ ms.
{
return fmt::format("{:5.2f} ms", seconds * 1e3);
return fmt::format("{:.2f} ms", seconds * 1e3);
}
else if (seconds >= 1e-4) // 100+ us.
else if (seconds >= 1e-5) // 10+ us.
{
return fmt::format("{:5.2f} us", seconds * 1e6);
return fmt::format("{:.2f} us", seconds * 1e6);
}
else
{
return fmt::format("{:5.2f} ns", seconds * 1e9);
return fmt::format("{:.2f} ns", seconds * 1e9);
}
};
auto format_item_rate = [](nvbench::float64_t items_per_second) {
return fmt::format("{:.3g}/sec", items_per_second);
};
auto format_byte_rate = [](nvbench::float64_t bytes_per_second) {
if (bytes_per_second >= 10. * 1024. * 1024. * 1024.) // 10 GiB/s
{
return fmt::format("{:.2f} GiB/s",
bytes_per_second / (1024. * 1024. * 1024.));
}
else if (bytes_per_second >= 10. * 1024. * 1024.) // 10 MiB/s
{
return fmt::format("{:.2f} MiB/s", bytes_per_second / (1024. * 1024.));
}
else if (bytes_per_second >= 10. * 1024.) // 10 KiB/s.
{
return fmt::format("{:.2f} KiB/s", bytes_per_second / 1024.);
}
else
{
return fmt::format("{:.2f} B/s", bytes_per_second);
}
};
auto format_percentage = [](nvbench::float64_t percentage) {
return fmt::format("{:.2f}%", percentage);
};
auto &mgr = nvbench::benchmark_manager::get();
for (const auto &bench_ptr : mgr.get_benchmarks())
{
@@ -178,12 +206,32 @@ void markdown_format::print()
? summ.get_string("short_name")
: summ.get_name();
if (summ.has_value("hint") && summ.get_string("hint") == "duration")
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
: std::string{};
if (hint == "duration")
{
table.add_cell(row,
name,
format_duration(summ.get_float64("value")));
}
else if (hint == "item_rate")
{
table.add_cell(row,
name,
format_item_rate(summ.get_float64("value")));
}
else if (hint == "byte_rate")
{
table.add_cell(row,
name,
format_byte_rate(summ.get_float64("value")));
}
else if (hint == "percentage")
{
table.add_cell(row,
name,
format_percentage(summ.get_float64("value")));
}
else
{
table.add_cell(row,

View File

@@ -0,0 +1,81 @@
#include <nvbench/detail/measure_cold.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <cstdio>
#include <variant>
namespace nvbench
{
namespace detail
{
void measure_cold_base::initialize()
{
m_cuda_time = 0.;
m_cpu_time = 0.;
m_num_trials = 0;
}
void measure_cold_base::generate_summaries()
{
{
auto &summ = m_state.add_summary("Number of Trials (Cold)");
summ.set_string("short_name", "Cold Trials");
summ.set_string("description",
"Number of kernel executions in cold time measurements.");
summ.set_int64("value", m_num_trials);
}
const auto avg_cuda_time = m_cuda_time / m_num_trials;
{
auto &summ = m_state.add_summary("Average GPU Time (Cold)");
summ.set_string("hint", "duration");
summ.set_string("short_name", "Cold GPU");
summ.set_string("description",
"Average isolated kernel execution time as measured "
"by CUDA events.");
summ.set_float64("value", avg_cuda_time);
}
const auto avg_cpu_time = m_cpu_time / m_num_trials;
{
auto &summ = m_state.add_summary("Average CPU Time (Cold)");
summ.set_string("hint", "duration");
summ.set_string("short_name", "Cold CPU");
summ.set_string("description",
"Average isolated kernel execution time observed "
"from host.");
summ.set_float64("value", avg_cpu_time);
}
// Log to stdout:
fmt::memory_buffer param_buffer;
fmt::format_to(param_buffer, "");
const auto &axis_values = m_state.get_axis_values();
for (const auto &name : axis_values.get_names())
{
fmt::format_to(param_buffer, "{}=", name);
std::visit([&param_buffer](
const auto &val) { fmt::format_to(param_buffer, "{} ", val); },
axis_values.get_value(name));
}
fmt::print("Benchmark {} Params: [ {}] Cold {:.6f} ms GPU, {:.6f} ms CPU, "
"{}x\n",
m_state.get_benchmark().get_name(),
fmt::to_string(param_buffer),
avg_cuda_time * 1e3,
avg_cpu_time * 1e3,
m_num_trials);
std::fflush(stdout);
}
} // namespace detail
} // namespace nvbench

View File

@@ -0,0 +1,108 @@
#pragma once
#include <nvbench/cpu_timer.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/state.cuh>
#include <nvbench/detail/l2flush.cuh>
#include <cuda_runtime.h>
#include <utility>
namespace nvbench
{
namespace detail
{
// non-templated code goes here:
struct measure_cold_base
{
explicit measure_cold_base(nvbench::state &exec_state)
: m_state(exec_state)
{}
measure_cold_base(const measure_cold_base &) = delete;
measure_cold_base(measure_cold_base &&) = delete;
measure_cold_base &operator=(const measure_cold_base &) = delete;
measure_cold_base &operator=(measure_cold_base &&) = delete;
protected:
void initialize();
void generate_summaries();
nvbench::launch m_launch{};
nvbench::cuda_timer m_cuda_timer{};
nvbench::cpu_timer m_cpu_timer{};
nvbench::detail::l2flush m_l2flush{};
// seconds:
nvbench::float64_t m_min_time{1.};
nvbench::float64_t m_cuda_time{};
nvbench::float64_t m_cpu_time{};
nvbench::int64_t m_num_trials{};
nvbench::state &m_state;
};
template <typename KernelLauncher>
struct measure_cold : public measure_cold_base
{
measure_cold(nvbench::state &state, KernelLauncher &kernel_launcher)
: measure_cold_base(state)
, m_kernel_launcher{kernel_launcher}
{}
void operator()()
{
this->initialize();
this->run_warmup();
this->run_trials();
this->generate_summaries();
}
private:
void run_warmup()
{
m_l2flush.flush(m_launch.get_stream());
this->launch_kernel();
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
}
void run_trials()
{
do
{
m_l2flush.flush(m_launch.get_stream());
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
m_cuda_timer.start(m_launch.get_stream());
m_cpu_timer.start();
this->launch_kernel();
m_cuda_timer.stop(m_launch.get_stream());
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
m_cpu_timer.stop();
// TODO eventually these should also get logged in a vector for
// statistical analysis.
m_cuda_time += m_cuda_timer.get_duration();
m_cpu_time += m_cpu_timer.get_duration();
++m_num_trials;
} while (std::max(m_cuda_time, m_cpu_time) < m_min_time);
}
// TODO forceinline
void launch_kernel() { m_kernel_launcher(m_launch); }
KernelLauncher &m_kernel_launcher;
};
} // namespace detail
} // namespace nvbench

View File

@@ -1,8 +1,18 @@
#include <nvbench/detail/measure_hot.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <cstdio>
#include <variant>
// note that these can be removed once there's a device_manager or some such:
#include <cuda_runtime_api.h>
#include <nvbench/cuda_call.cuh>
namespace nvbench
{
@@ -12,32 +22,98 @@ namespace detail
void measure_hot_base::generate_summaries()
{
{
auto &summary = m_state.add_summary("Number of Trials (Hot)");
summary.set_string("short_name", "Hot Trials");
summary.set_string("description",
"Number of kernel executions in hot time measurements.");
summary.set_int64("value", m_num_trials);
auto &summ = m_state.add_summary("Number of Trials (Hot)");
summ.set_string("short_name", "Hot Trials");
summ.set_string("description",
"Number of kernel executions in hot time measurements.");
summ.set_int64("value", m_num_trials);
}
const auto avg_cuda_time = m_cuda_time / m_num_trials;
{
auto &summary = m_state.add_summary("Average GPU Time (Hot)");
summary.set_string("hint", "duration");
summary.set_string("short_name", "Hot GPU");
summary.set_string("description",
"Average back-to-back kernel execution time as measured "
"by CUDA events.");
summary.set_float64("value", m_cuda_time / m_num_trials);
auto &summ = m_state.add_summary("Average GPU Time (Hot)");
summ.set_string("hint", "duration");
summ.set_string("short_name", "Hot GPU");
summ.set_string("description",
"Average back-to-back kernel execution time as measured "
"by CUDA events.");
summ.set_float64("value", avg_cuda_time);
}
const auto avg_cpu_time = m_cpu_time / m_num_trials;
{
auto &summary = m_state.add_summary("Average CPU Time (Hot)");
summary.set_string("hint", "duration");
summary.set_string("short_name", "Hot CPU");
summary.set_string("description",
"Average back-to-back kernel execution time observed "
"from host.");
summary.set_float64("value", m_cpu_time / m_num_trials);
auto &summ = m_state.add_summary("Average CPU Time (Hot)");
summ.set_string("hint", "duration");
summ.set_string("short_name", "Hot CPU");
summ.set_string("description",
"Average back-to-back kernel execution time observed "
"from host.");
summ.set_float64("value", avg_cpu_time);
}
if (const auto items = m_state.get_items_processed_per_launch(); items != 0)
{
auto &summ = m_state.add_summary("Item Throughput");
summ.set_string("hint", "item_rate");
summ.set_string("short_name", "Item Rate");
summ.set_string("description", "Number of input items handled per second.");
summ.set_float64("value", items / avg_cuda_time);
}
if (const auto bytes = m_state.get_global_bytes_accessed_per_launch();
bytes != 0)
{
const auto avg_used_gmem_bw = bytes / avg_cuda_time;
{
auto &summ = m_state.add_summary("Average Global Memory Throughput");
summ.set_string("hint", "byte_rate");
summ.set_string("short_name", "GlobalMemUse");
summ.set_string("description",
"Number of bytes read/written per second to the CUDA "
"device's global memory.");
summ.set_float64("value", avg_used_gmem_bw);
}
// TODO cache this in a singleton somewhere.
int dev_id{};
cudaDeviceProp prop{};
NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id));
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&prop, dev_id));
// clock rate in khz, width in bits. Result in bytes/sec.
const auto peak_gmem_bw = 2 * 1000. * prop.memoryClockRate * // (sec^-1)
prop.memoryBusWidth / CHAR_BIT; // bytes
{
auto &summ = m_state.add_summary("Percent Peak Global Memory Throughput");
summ.set_string("hint", "percentage");
summ.set_string("short_name", "PeakGMem");
summ.set_string("description",
"Global device memory throughput as a percentage of the "
"device's peak bandwidth.");
summ.set_float64("value", avg_used_gmem_bw / peak_gmem_bw * 100.);
}
}
// Log to stdout:
fmt::memory_buffer param_buffer;
fmt::format_to(param_buffer, "");
const auto &axis_values = m_state.get_axis_values();
for (const auto &name : axis_values.get_names())
{
fmt::format_to(param_buffer, "{}=", name);
std::visit([&param_buffer](
const auto &val) { fmt::format_to(param_buffer, "{} ", val); },
axis_values.get_value(name));
}
fmt::print("Benchmark {} Params: [ {}] Hot {:.6f} ms GPU, {:.6f} ms CPU, "
"{}x\n",
m_state.get_benchmark().get_name(),
fmt::to_string(param_buffer),
avg_cuda_time * 1e3,
avg_cpu_time * 1e3,
m_num_trials);
std::fflush(stdout);
}
} // namespace detail

View File

@@ -1,5 +1,6 @@
#include <nvbench/detail/state_generator.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/named_values.cuh>
#include <nvbench/type_axis.cuh>
@@ -18,7 +19,7 @@ namespace detail
{
std::vector<std::vector<nvbench::state>>
state_generator::create(const axes_metadata &axes)
state_generator::create(const benchmark_base &bench)
{
// Assemble states into a std::vector<std::vector<nvbench::state>>, where the
// outer vector has one inner vector per type_config, and all configs in an
@@ -27,6 +28,7 @@ state_generator::create(const axes_metadata &axes)
// matching up states to kernel_generator instantiations much easier during
// dispatch.
const axes_metadata& axes = bench.get_axes();
// vector of all axes:
const std::vector<std::unique_ptr<axis_base>> &axes_vec = axes.get_axes();
@@ -99,7 +101,7 @@ state_generator::create(const axes_metadata &axes)
for (non_type_sg.init(); non_type_sg.iter_valid(); non_type_sg.next())
{
// Initialize each state with the current type_config:
nvbench::state state{type_config};
nvbench::state state{bench, type_config};
// Add non-type parameters to state:
for (const axis_index &axis_info : non_type_sg.get_current_indices())
{

View File

@@ -9,7 +9,7 @@
namespace nvbench
{
struct benchmark_base;
namespace detail
{
@@ -17,7 +17,7 @@ struct state_generator
{
static std::vector<std::vector<nvbench::state>>
create(const axes_metadata &axes);
create(const benchmark_base &bench);
protected:
struct axis_index

View File

@@ -1,5 +1,6 @@
#pragma once
#include <nvbench/detail/measure_cold.cuh>
#include <nvbench/detail/measure_hot.cuh>
namespace nvbench
@@ -9,8 +10,16 @@ template <typename KernelLauncher>
void exec(nvbench::state &exec_state, KernelLauncher &&kernel_launcher)
{
using KL = std::remove_reference_t<KernelLauncher>;
nvbench::detail::measure_hot<KL> hot{exec_state, kernel_launcher};
hot();
{
nvbench::detail::measure_cold<KL> cold{exec_state, kernel_launcher};
cold();
}
{
nvbench::detail::measure_hot<KL> hot{exec_state, kernel_launcher};
hot();
}
}
} // namespace nvbench

View File

@@ -12,6 +12,7 @@
#include <nvbench/exec.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/main.cuh>
#include <nvbench/range.cuh>
#include <nvbench/state.cuh>
#include <nvbench/type_list.cuh>
#include <nvbench/types.cuh>

32
nvbench/range.cuh Normal file
View File

@@ -0,0 +1,32 @@
#pragma once
#include <nvbench/types.cuh>
#include <type_traits>
#include <vector>
namespace nvbench
{
namespace detail
{
template <typename T>
using range_output_t = std::conditional_t<std::is_floating_point_v<T>,
nvbench::float64_t,
nvbench::int64_t>;
}
template <typename T>
auto range(T start, T end, T stride = T{1})
{
using output_t = detail::range_output_t<T>;
using result_t = std::vector<output_t>;
result_t result;
for (; start <= end; start += stride)
{
result.push_back(static_cast<output_t>(start));
}
return result;
}
} // namespace nvbench

View File

@@ -24,7 +24,7 @@ struct runner
void generate_states()
{
m_benchmark.m_states =
nvbench::detail::state_generator::create(m_benchmark.m_axes);
nvbench::detail::state_generator::create(m_benchmark);
}
void run()

View File

@@ -10,6 +10,8 @@
namespace nvbench
{
struct benchmark_base;
namespace detail
{
struct state_generator;
@@ -45,6 +47,24 @@ struct state
[[nodiscard]] const std::string &
get_string(const std::string &axis_name) const;
void set_items_processed_per_launch(nvbench::int64_t items)
{
m_items_processed_per_launch = items;
}
nvbench::int64_t get_items_processed_per_launch() const
{
return m_items_processed_per_launch;
}
void set_global_bytes_accessed_per_launch(nvbench::int64_t bytes)
{
m_global_bytes_accessed_per_launch = bytes;
}
nvbench::int64_t get_global_bytes_accessed_per_launch() const
{
return m_global_bytes_accessed_per_launch;
}
void skip(std::string reason) { m_skip_reason = std::move(reason); }
[[nodiscard]] bool is_skipped() const { return !m_skip_reason.empty(); }
[[nodiscard]] const std::string &get_skip_reason() const
@@ -57,6 +77,8 @@ struct state
return m_axis_values;
}
const benchmark_base &get_benchmark() const { return m_benchmark; }
summary &add_summary(std::string summary_name);
summary &add_summary(summary s);
[[nodiscard]] const summary &get_summary(std::string_view name) const;
@@ -67,15 +89,21 @@ struct state
protected:
friend struct nvbench::detail::state_generator;
state() = default;
state(nvbench::named_values values)
: m_axis_values{std::move(values)}
explicit state(const benchmark_base &bench)
: m_benchmark{bench}
{}
state(const benchmark_base &bench, nvbench::named_values values)
: m_benchmark{bench}
, m_axis_values{std::move(values)}
{}
const nvbench::benchmark_base &m_benchmark;
nvbench::named_values m_axis_values;
std::vector<nvbench::summary> m_summaries;
std::string m_skip_reason;
nvbench::int64_t m_items_processed_per_launch{};
nvbench::int64_t m_global_bytes_accessed_per_launch{};
};
} // namespace nvbench

View File

@@ -1,15 +1,22 @@
#include <nvbench/state.cuh>
#include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
// Mock up a benchmark for testing:
void dummy_generator(nvbench::state &) {}
NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable);
using dummy_bench = nvbench::benchmark<dummy_callable>;
// Subclass to gain access to protected members for testing:
struct state_tester : public nvbench::state
{
state_tester()
: nvbench::state()
state_tester(const nvbench::benchmark_base& bench)
: nvbench::state{bench}
{}
template <typename T>
@@ -23,8 +30,10 @@ struct state_tester : public nvbench::state
void test_params()
{
dummy_bench bench;
// Build a state param by param
state_tester state;
state_tester state{bench};
state.set_param("TestInt", nvbench::int64_t{22});
state.set_param("TestFloat", nvbench::float64_t{3.14});
state.set_param("TestString", "A String!");
@@ -36,7 +45,8 @@ void test_params()
void test_summaries()
{
state_tester state;
dummy_bench bench;
state_tester state{bench};
ASSERT(state.get_summaries().size() == 0);
{

View File

@@ -2,11 +2,27 @@
#include <nvbench/axes_metadata.cuh>
#include <nvbench/axis_base.cuh>
#include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
// Mock up a benchmark for testing:
void dummy_generator(nvbench::state &) {}
NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable);
using dummy_bench = nvbench::benchmark<dummy_callable>;
using floats = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
using ints = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
using misc = nvbench::type_list<void, bool>;
using type_axes = nvbench::type_list<floats, ints, misc>;
template <typename F, typename I, typename M>
void template_generator(nvbench::state &, nvbench::type_list<F, I, M>){};
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_generator, template_callable);
using template_bench = nvbench::benchmark<template_callable, type_axes>;
struct state_generator_tester : nvbench::detail::state_generator
{
using nvbench::detail::state_generator::add_axis;
@@ -124,16 +140,16 @@ void test_basic()
void test_create()
{
nvbench::axes_metadata axes;
axes.add_float64_axis("Radians", {3.14, 6.28});
axes.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
axes.add_int64_axis("NumInputs",
{10, 15, 20},
nvbench::int64_axis_flags::power_of_two);
axes.add_string_axis("Strategy", {"Recursive", "Iterative"});
dummy_bench bench;
bench.add_float64_axis("Radians", {3.14, 6.28});
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
bench.add_int64_axis("NumInputs",
{10, 15, 20},
nvbench::int64_axis_flags::power_of_two);
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
const std::vector<std::vector<nvbench::state>> states =
nvbench::detail::state_generator::create(axes);
nvbench::detail::state_generator::create(bench);
// Outer vector has one entry per type_config. There are no type axes, so
// there's only one type_config:
@@ -161,7 +177,7 @@ void test_create()
"Strategy");
std::size_t type_config = 0;
std::size_t config = 0;
std::size_t config = 0;
for (const auto &inner_states : states)
{
for (const nvbench::state &state : inner_states)
@@ -224,26 +240,19 @@ void test_create()
fmt::format("Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test));
}
void test_create_with_types()
{
using floats = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
using ints = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
using misc = nvbench::type_list<void, bool>;
using type_axes = nvbench::type_list<floats, ints, misc>;
nvbench::axes_metadata axes;
axes.set_type_axes_names<type_axes>({"Floats", "Ints", "Misc"});
axes.add_float64_axis("Radians", {3.14, 6.28});
axes.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
axes.add_int64_axis("NumInputs",
{10, 15, 20},
nvbench::int64_axis_flags::power_of_two);
axes.add_string_axis("Strategy", {"Recursive", "Iterative"});
template_bench bench;
bench.set_type_axes_names({"Floats", "Ints", "Misc"});
bench.add_float64_axis("Radians", {3.14, 6.28});
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
bench.add_int64_axis("NumInputs",
{10, 15, 20},
nvbench::int64_axis_flags::power_of_two);
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
const std::vector<std::vector<nvbench::state>> states =
nvbench::detail::state_generator::create(axes);
nvbench::detail::state_generator::create(bench);
// Outer vector has one entry per type_config
// 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs
@@ -257,8 +266,8 @@ void test_create_with_types()
}
fmt::memory_buffer buffer;
std::string table_format =
"| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} | {:^7} | {:^9} | {:^9} |\n";
std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} "
"| {:^7} | {:^9} | {:^9} |\n";
fmt::format_to(buffer, "\n");
fmt::format_to(buffer,
@@ -274,7 +283,7 @@ void test_create_with_types()
"Strategy");
std::size_t type_config = 0;
std::size_t config = 0;
std::size_t config = 0;
for (const auto &inner_states : states)
{
for (const nvbench::state &state : inner_states)