mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 06:48:53 +00:00
Polishing up the proof of concept.
- Enable throughput stats - Add cold measurements - Print liveness/log messages while running trials. - Improve markdown formatting - nvbench::range
This commit is contained in:
@@ -12,6 +12,7 @@ set(srcs
|
||||
type_axis.cu
|
||||
|
||||
detail/markdown_format.cu
|
||||
detail/measure_cold.cu
|
||||
detail/measure_hot.cu
|
||||
detail/state_generator.cu
|
||||
)
|
||||
|
||||
49
nvbench/detail/l2flush.cuh
Normal file
49
nvbench/detail/l2flush.cuh
Normal file
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
|
||||
struct l2flush
|
||||
{
|
||||
l2flush()
|
||||
: m_l2_buffer{nullptr}
|
||||
{
|
||||
int dev_id{};
|
||||
NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id));
|
||||
NVBENCH_CUDA_CALL(
|
||||
cudaDeviceGetAttribute(&m_l2_size, cudaDevAttrL2CacheSize, dev_id));
|
||||
if (m_l2_size > 0)
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaMalloc(&m_l2_buffer, m_l2_size));
|
||||
}
|
||||
}
|
||||
|
||||
~l2flush()
|
||||
{
|
||||
if (m_l2_buffer)
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaFree(m_l2_buffer));
|
||||
}
|
||||
}
|
||||
|
||||
void flush(cudaStream_t stream)
|
||||
{
|
||||
if (m_l2_size > 0)
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaMemsetAsync(m_l2_buffer, 0, m_l2_size, stream));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int m_l2_size;
|
||||
int *m_l2_buffer;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
} // namespace nvbench
|
||||
@@ -124,7 +124,7 @@ void markdown_format::print()
|
||||
using T = std::decay_t<decltype(v)>;
|
||||
if constexpr (std::is_same_v<T, nvbench::float64_t>)
|
||||
{
|
||||
return fmt::format("{:7.5g}", v);
|
||||
return fmt::format("{:.5g}", v);
|
||||
}
|
||||
else if constexpr (std::is_same_v<T, std::string>)
|
||||
{
|
||||
@@ -136,22 +136,50 @@ void markdown_format::print()
|
||||
auto format_duration = [](nvbench::float64_t seconds) {
|
||||
if (seconds >= 1.) // 1+ sec
|
||||
{
|
||||
return fmt::format("{:5.2f} s", seconds);
|
||||
return fmt::format("{:.2f} s", seconds);
|
||||
}
|
||||
else if (seconds >= 1e-1) // 100+ ms.
|
||||
else if (seconds >= 1e-2) // 10+ ms.
|
||||
{
|
||||
return fmt::format("{:5.2f} ms", seconds * 1e3);
|
||||
return fmt::format("{:.2f} ms", seconds * 1e3);
|
||||
}
|
||||
else if (seconds >= 1e-4) // 100+ us.
|
||||
else if (seconds >= 1e-5) // 10+ us.
|
||||
{
|
||||
return fmt::format("{:5.2f} us", seconds * 1e6);
|
||||
return fmt::format("{:.2f} us", seconds * 1e6);
|
||||
}
|
||||
else
|
||||
{
|
||||
return fmt::format("{:5.2f} ns", seconds * 1e9);
|
||||
return fmt::format("{:.2f} ns", seconds * 1e9);
|
||||
}
|
||||
};
|
||||
|
||||
auto format_item_rate = [](nvbench::float64_t items_per_second) {
|
||||
return fmt::format("{:.3g}/sec", items_per_second);
|
||||
};
|
||||
|
||||
auto format_byte_rate = [](nvbench::float64_t bytes_per_second) {
|
||||
if (bytes_per_second >= 10. * 1024. * 1024. * 1024.) // 10 GiB/s
|
||||
{
|
||||
return fmt::format("{:.2f} GiB/s",
|
||||
bytes_per_second / (1024. * 1024. * 1024.));
|
||||
}
|
||||
else if (bytes_per_second >= 10. * 1024. * 1024.) // 10 MiB/s
|
||||
{
|
||||
return fmt::format("{:.2f} MiB/s", bytes_per_second / (1024. * 1024.));
|
||||
}
|
||||
else if (bytes_per_second >= 10. * 1024.) // 10 KiB/s.
|
||||
{
|
||||
return fmt::format("{:.2f} KiB/s", bytes_per_second / 1024.);
|
||||
}
|
||||
else
|
||||
{
|
||||
return fmt::format("{:.2f} B/s", bytes_per_second);
|
||||
}
|
||||
};
|
||||
|
||||
auto format_percentage = [](nvbench::float64_t percentage) {
|
||||
return fmt::format("{:.2f}%", percentage);
|
||||
};
|
||||
|
||||
auto &mgr = nvbench::benchmark_manager::get();
|
||||
for (const auto &bench_ptr : mgr.get_benchmarks())
|
||||
{
|
||||
@@ -178,12 +206,32 @@ void markdown_format::print()
|
||||
? summ.get_string("short_name")
|
||||
: summ.get_name();
|
||||
|
||||
if (summ.has_value("hint") && summ.get_string("hint") == "duration")
|
||||
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
|
||||
: std::string{};
|
||||
if (hint == "duration")
|
||||
{
|
||||
table.add_cell(row,
|
||||
name,
|
||||
format_duration(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "item_rate")
|
||||
{
|
||||
table.add_cell(row,
|
||||
name,
|
||||
format_item_rate(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "byte_rate")
|
||||
{
|
||||
table.add_cell(row,
|
||||
name,
|
||||
format_byte_rate(summ.get_float64("value")));
|
||||
}
|
||||
else if (hint == "percentage")
|
||||
{
|
||||
table.add_cell(row,
|
||||
name,
|
||||
format_percentage(summ.get_float64("value")));
|
||||
}
|
||||
else
|
||||
{
|
||||
table.add_cell(row,
|
||||
|
||||
81
nvbench/detail/measure_cold.cu
Normal file
81
nvbench/detail/measure_cold.cu
Normal file
@@ -0,0 +1,81 @@
|
||||
#include <nvbench/detail/measure_cold.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <variant>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
void measure_cold_base::initialize()
|
||||
{
|
||||
m_cuda_time = 0.;
|
||||
m_cpu_time = 0.;
|
||||
m_num_trials = 0;
|
||||
}
|
||||
|
||||
void measure_cold_base::generate_summaries()
|
||||
{
|
||||
{
|
||||
auto &summ = m_state.add_summary("Number of Trials (Cold)");
|
||||
summ.set_string("short_name", "Cold Trials");
|
||||
summ.set_string("description",
|
||||
"Number of kernel executions in cold time measurements.");
|
||||
summ.set_int64("value", m_num_trials);
|
||||
}
|
||||
|
||||
const auto avg_cuda_time = m_cuda_time / m_num_trials;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Cold)");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "Cold GPU");
|
||||
summ.set_string("description",
|
||||
"Average isolated kernel execution time as measured "
|
||||
"by CUDA events.");
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_cpu_time / m_num_trials;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Cold)");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "Cold CPU");
|
||||
summ.set_string("description",
|
||||
"Average isolated kernel execution time observed "
|
||||
"from host.");
|
||||
summ.set_float64("value", avg_cpu_time);
|
||||
}
|
||||
|
||||
// Log to stdout:
|
||||
fmt::memory_buffer param_buffer;
|
||||
fmt::format_to(param_buffer, "");
|
||||
const auto &axis_values = m_state.get_axis_values();
|
||||
for (const auto &name : axis_values.get_names())
|
||||
{
|
||||
fmt::format_to(param_buffer, "{}=", name);
|
||||
std::visit([¶m_buffer](
|
||||
const auto &val) { fmt::format_to(param_buffer, "{} ", val); },
|
||||
axis_values.get_value(name));
|
||||
}
|
||||
|
||||
fmt::print("Benchmark {} Params: [ {}] Cold {:.6f} ms GPU, {:.6f} ms CPU, "
|
||||
"{}x\n",
|
||||
m_state.get_benchmark().get_name(),
|
||||
fmt::to_string(param_buffer),
|
||||
avg_cuda_time * 1e3,
|
||||
avg_cpu_time * 1e3,
|
||||
m_num_trials);
|
||||
std::fflush(stdout);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace nvbench
|
||||
108
nvbench/detail/measure_cold.cuh
Normal file
108
nvbench/detail/measure_cold.cuh
Normal file
@@ -0,0 +1,108 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <nvbench/detail/l2flush.cuh>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
// non-templated code goes here:
|
||||
struct measure_cold_base
|
||||
{
|
||||
explicit measure_cold_base(nvbench::state &exec_state)
|
||||
: m_state(exec_state)
|
||||
{}
|
||||
measure_cold_base(const measure_cold_base &) = delete;
|
||||
measure_cold_base(measure_cold_base &&) = delete;
|
||||
measure_cold_base &operator=(const measure_cold_base &) = delete;
|
||||
measure_cold_base &operator=(measure_cold_base &&) = delete;
|
||||
|
||||
protected:
|
||||
void initialize();
|
||||
|
||||
void generate_summaries();
|
||||
|
||||
nvbench::launch m_launch{};
|
||||
nvbench::cuda_timer m_cuda_timer{};
|
||||
nvbench::cpu_timer m_cpu_timer{};
|
||||
nvbench::detail::l2flush m_l2flush{};
|
||||
|
||||
// seconds:
|
||||
nvbench::float64_t m_min_time{1.};
|
||||
nvbench::float64_t m_cuda_time{};
|
||||
nvbench::float64_t m_cpu_time{};
|
||||
|
||||
nvbench::int64_t m_num_trials{};
|
||||
|
||||
nvbench::state &m_state;
|
||||
};
|
||||
|
||||
template <typename KernelLauncher>
|
||||
struct measure_cold : public measure_cold_base
|
||||
{
|
||||
measure_cold(nvbench::state &state, KernelLauncher &kernel_launcher)
|
||||
: measure_cold_base(state)
|
||||
, m_kernel_launcher{kernel_launcher}
|
||||
{}
|
||||
|
||||
void operator()()
|
||||
{
|
||||
this->initialize();
|
||||
this->run_warmup();
|
||||
this->run_trials();
|
||||
this->generate_summaries();
|
||||
}
|
||||
|
||||
private:
|
||||
void run_warmup()
|
||||
{
|
||||
m_l2flush.flush(m_launch.get_stream());
|
||||
this->launch_kernel();
|
||||
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
|
||||
}
|
||||
|
||||
void run_trials()
|
||||
{
|
||||
do
|
||||
{
|
||||
m_l2flush.flush(m_launch.get_stream());
|
||||
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
|
||||
|
||||
m_cuda_timer.start(m_launch.get_stream());
|
||||
m_cpu_timer.start();
|
||||
|
||||
this->launch_kernel();
|
||||
|
||||
m_cuda_timer.stop(m_launch.get_stream());
|
||||
|
||||
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
|
||||
m_cpu_timer.stop();
|
||||
|
||||
// TODO eventually these should also get logged in a vector for
|
||||
// statistical analysis.
|
||||
m_cuda_time += m_cuda_timer.get_duration();
|
||||
m_cpu_time += m_cpu_timer.get_duration();
|
||||
++m_num_trials;
|
||||
} while (std::max(m_cuda_time, m_cpu_time) < m_min_time);
|
||||
}
|
||||
|
||||
// TODO forceinline
|
||||
void launch_kernel() { m_kernel_launcher(m_launch); }
|
||||
|
||||
KernelLauncher &m_kernel_launcher;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
} // namespace nvbench
|
||||
@@ -1,8 +1,18 @@
|
||||
#include <nvbench/detail/measure_hot.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <variant>
|
||||
|
||||
// note that these can be removed once there's a device_manager or some such:
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
@@ -12,32 +22,98 @@ namespace detail
|
||||
void measure_hot_base::generate_summaries()
|
||||
{
|
||||
{
|
||||
auto &summary = m_state.add_summary("Number of Trials (Hot)");
|
||||
summary.set_string("short_name", "Hot Trials");
|
||||
summary.set_string("description",
|
||||
"Number of kernel executions in hot time measurements.");
|
||||
summary.set_int64("value", m_num_trials);
|
||||
auto &summ = m_state.add_summary("Number of Trials (Hot)");
|
||||
summ.set_string("short_name", "Hot Trials");
|
||||
summ.set_string("description",
|
||||
"Number of kernel executions in hot time measurements.");
|
||||
summ.set_int64("value", m_num_trials);
|
||||
}
|
||||
|
||||
const auto avg_cuda_time = m_cuda_time / m_num_trials;
|
||||
{
|
||||
auto &summary = m_state.add_summary("Average GPU Time (Hot)");
|
||||
summary.set_string("hint", "duration");
|
||||
summary.set_string("short_name", "Hot GPU");
|
||||
summary.set_string("description",
|
||||
"Average back-to-back kernel execution time as measured "
|
||||
"by CUDA events.");
|
||||
summary.set_float64("value", m_cuda_time / m_num_trials);
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Hot)");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "Hot GPU");
|
||||
summ.set_string("description",
|
||||
"Average back-to-back kernel execution time as measured "
|
||||
"by CUDA events.");
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_cpu_time / m_num_trials;
|
||||
{
|
||||
auto &summary = m_state.add_summary("Average CPU Time (Hot)");
|
||||
summary.set_string("hint", "duration");
|
||||
summary.set_string("short_name", "Hot CPU");
|
||||
summary.set_string("description",
|
||||
"Average back-to-back kernel execution time observed "
|
||||
"from host.");
|
||||
summary.set_float64("value", m_cpu_time / m_num_trials);
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Hot)");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "Hot CPU");
|
||||
summ.set_string("description",
|
||||
"Average back-to-back kernel execution time observed "
|
||||
"from host.");
|
||||
summ.set_float64("value", avg_cpu_time);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_items_processed_per_launch(); items != 0)
|
||||
{
|
||||
auto &summ = m_state.add_summary("Item Throughput");
|
||||
summ.set_string("hint", "item_rate");
|
||||
summ.set_string("short_name", "Item Rate");
|
||||
summ.set_string("description", "Number of input items handled per second.");
|
||||
summ.set_float64("value", items / avg_cuda_time);
|
||||
}
|
||||
|
||||
if (const auto bytes = m_state.get_global_bytes_accessed_per_launch();
|
||||
bytes != 0)
|
||||
{
|
||||
const auto avg_used_gmem_bw = bytes / avg_cuda_time;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average Global Memory Throughput");
|
||||
summ.set_string("hint", "byte_rate");
|
||||
summ.set_string("short_name", "GlobalMemUse");
|
||||
summ.set_string("description",
|
||||
"Number of bytes read/written per second to the CUDA "
|
||||
"device's global memory.");
|
||||
summ.set_float64("value", avg_used_gmem_bw);
|
||||
}
|
||||
|
||||
// TODO cache this in a singleton somewhere.
|
||||
int dev_id{};
|
||||
cudaDeviceProp prop{};
|
||||
NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id));
|
||||
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&prop, dev_id));
|
||||
// clock rate in khz, width in bits. Result in bytes/sec.
|
||||
const auto peak_gmem_bw = 2 * 1000. * prop.memoryClockRate * // (sec^-1)
|
||||
prop.memoryBusWidth / CHAR_BIT; // bytes
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("Percent Peak Global Memory Throughput");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("short_name", "PeakGMem");
|
||||
summ.set_string("description",
|
||||
"Global device memory throughput as a percentage of the "
|
||||
"device's peak bandwidth.");
|
||||
summ.set_float64("value", avg_used_gmem_bw / peak_gmem_bw * 100.);
|
||||
}
|
||||
}
|
||||
|
||||
// Log to stdout:
|
||||
fmt::memory_buffer param_buffer;
|
||||
fmt::format_to(param_buffer, "");
|
||||
const auto &axis_values = m_state.get_axis_values();
|
||||
for (const auto &name : axis_values.get_names())
|
||||
{
|
||||
fmt::format_to(param_buffer, "{}=", name);
|
||||
std::visit([¶m_buffer](
|
||||
const auto &val) { fmt::format_to(param_buffer, "{} ", val); },
|
||||
axis_values.get_value(name));
|
||||
}
|
||||
|
||||
fmt::print("Benchmark {} Params: [ {}] Hot {:.6f} ms GPU, {:.6f} ms CPU, "
|
||||
"{}x\n",
|
||||
m_state.get_benchmark().get_name(),
|
||||
fmt::to_string(param_buffer),
|
||||
avg_cuda_time * 1e3,
|
||||
avg_cpu_time * 1e3,
|
||||
m_num_trials);
|
||||
std::fflush(stdout);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <nvbench/detail/state_generator.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/type_axis.cuh>
|
||||
|
||||
@@ -18,7 +19,7 @@ namespace detail
|
||||
{
|
||||
|
||||
std::vector<std::vector<nvbench::state>>
|
||||
state_generator::create(const axes_metadata &axes)
|
||||
state_generator::create(const benchmark_base &bench)
|
||||
{
|
||||
// Assemble states into a std::vector<std::vector<nvbench::state>>, where the
|
||||
// outer vector has one inner vector per type_config, and all configs in an
|
||||
@@ -27,6 +28,7 @@ state_generator::create(const axes_metadata &axes)
|
||||
// matching up states to kernel_generator instantiations much easier during
|
||||
// dispatch.
|
||||
|
||||
const axes_metadata& axes = bench.get_axes();
|
||||
// vector of all axes:
|
||||
const std::vector<std::unique_ptr<axis_base>> &axes_vec = axes.get_axes();
|
||||
|
||||
@@ -99,7 +101,7 @@ state_generator::create(const axes_metadata &axes)
|
||||
for (non_type_sg.init(); non_type_sg.iter_valid(); non_type_sg.next())
|
||||
{
|
||||
// Initialize each state with the current type_config:
|
||||
nvbench::state state{type_config};
|
||||
nvbench::state state{bench, type_config};
|
||||
// Add non-type parameters to state:
|
||||
for (const axis_index &axis_info : non_type_sg.get_current_indices())
|
||||
{
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
struct benchmark_base;
|
||||
namespace detail
|
||||
{
|
||||
|
||||
@@ -17,7 +17,7 @@ struct state_generator
|
||||
{
|
||||
|
||||
static std::vector<std::vector<nvbench::state>>
|
||||
create(const axes_metadata &axes);
|
||||
create(const benchmark_base &bench);
|
||||
|
||||
protected:
|
||||
struct axis_index
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/detail/measure_cold.cuh>
|
||||
#include <nvbench/detail/measure_hot.cuh>
|
||||
|
||||
namespace nvbench
|
||||
@@ -9,8 +10,16 @@ template <typename KernelLauncher>
|
||||
void exec(nvbench::state &exec_state, KernelLauncher &&kernel_launcher)
|
||||
{
|
||||
using KL = std::remove_reference_t<KernelLauncher>;
|
||||
nvbench::detail::measure_hot<KL> hot{exec_state, kernel_launcher};
|
||||
hot();
|
||||
|
||||
{
|
||||
nvbench::detail::measure_cold<KL> cold{exec_state, kernel_launcher};
|
||||
cold();
|
||||
}
|
||||
|
||||
{
|
||||
nvbench::detail::measure_hot<KL> hot{exec_state, kernel_launcher};
|
||||
hot();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <nvbench/exec.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
#include <nvbench/main.cuh>
|
||||
#include <nvbench/range.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/type_list.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
32
nvbench/range.cuh
Normal file
32
nvbench/range.cuh
Normal file
@@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <typename T>
|
||||
using range_output_t = std::conditional_t<std::is_floating_point_v<T>,
|
||||
nvbench::float64_t,
|
||||
nvbench::int64_t>;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto range(T start, T end, T stride = T{1})
|
||||
{
|
||||
using output_t = detail::range_output_t<T>;
|
||||
using result_t = std::vector<output_t>;
|
||||
result_t result;
|
||||
for (; start <= end; start += stride)
|
||||
{
|
||||
result.push_back(static_cast<output_t>(start));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
@@ -24,7 +24,7 @@ struct runner
|
||||
void generate_states()
|
||||
{
|
||||
m_benchmark.m_states =
|
||||
nvbench::detail::state_generator::create(m_benchmark.m_axes);
|
||||
nvbench::detail::state_generator::create(m_benchmark);
|
||||
}
|
||||
|
||||
void run()
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
struct benchmark_base;
|
||||
|
||||
namespace detail
|
||||
{
|
||||
struct state_generator;
|
||||
@@ -45,6 +47,24 @@ struct state
|
||||
[[nodiscard]] const std::string &
|
||||
get_string(const std::string &axis_name) const;
|
||||
|
||||
void set_items_processed_per_launch(nvbench::int64_t items)
|
||||
{
|
||||
m_items_processed_per_launch = items;
|
||||
}
|
||||
nvbench::int64_t get_items_processed_per_launch() const
|
||||
{
|
||||
return m_items_processed_per_launch;
|
||||
}
|
||||
|
||||
void set_global_bytes_accessed_per_launch(nvbench::int64_t bytes)
|
||||
{
|
||||
m_global_bytes_accessed_per_launch = bytes;
|
||||
}
|
||||
nvbench::int64_t get_global_bytes_accessed_per_launch() const
|
||||
{
|
||||
return m_global_bytes_accessed_per_launch;
|
||||
}
|
||||
|
||||
void skip(std::string reason) { m_skip_reason = std::move(reason); }
|
||||
[[nodiscard]] bool is_skipped() const { return !m_skip_reason.empty(); }
|
||||
[[nodiscard]] const std::string &get_skip_reason() const
|
||||
@@ -57,6 +77,8 @@ struct state
|
||||
return m_axis_values;
|
||||
}
|
||||
|
||||
const benchmark_base &get_benchmark() const { return m_benchmark; }
|
||||
|
||||
summary &add_summary(std::string summary_name);
|
||||
summary &add_summary(summary s);
|
||||
[[nodiscard]] const summary &get_summary(std::string_view name) const;
|
||||
@@ -67,15 +89,21 @@ struct state
|
||||
protected:
|
||||
friend struct nvbench::detail::state_generator;
|
||||
|
||||
state() = default;
|
||||
|
||||
state(nvbench::named_values values)
|
||||
: m_axis_values{std::move(values)}
|
||||
explicit state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
{}
|
||||
|
||||
state(const benchmark_base &bench, nvbench::named_values values)
|
||||
: m_benchmark{bench}
|
||||
, m_axis_values{std::move(values)}
|
||||
{}
|
||||
|
||||
const nvbench::benchmark_base &m_benchmark;
|
||||
nvbench::named_values m_axis_values;
|
||||
std::vector<nvbench::summary> m_summaries;
|
||||
std::string m_skip_reason;
|
||||
nvbench::int64_t m_items_processed_per_launch{};
|
||||
nvbench::int64_t m_global_bytes_accessed_per_launch{};
|
||||
};
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -1,15 +1,22 @@
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <nvbench/benchmark.cuh>
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
// Mock up a benchmark for testing:
|
||||
void dummy_generator(nvbench::state &) {}
|
||||
NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable);
|
||||
using dummy_bench = nvbench::benchmark<dummy_callable>;
|
||||
|
||||
// Subclass to gain access to protected members for testing:
|
||||
struct state_tester : public nvbench::state
|
||||
{
|
||||
state_tester()
|
||||
: nvbench::state()
|
||||
state_tester(const nvbench::benchmark_base& bench)
|
||||
: nvbench::state{bench}
|
||||
{}
|
||||
|
||||
template <typename T>
|
||||
@@ -23,8 +30,10 @@ struct state_tester : public nvbench::state
|
||||
|
||||
void test_params()
|
||||
{
|
||||
dummy_bench bench;
|
||||
|
||||
// Build a state param by param
|
||||
state_tester state;
|
||||
state_tester state{bench};
|
||||
state.set_param("TestInt", nvbench::int64_t{22});
|
||||
state.set_param("TestFloat", nvbench::float64_t{3.14});
|
||||
state.set_param("TestString", "A String!");
|
||||
@@ -36,7 +45,8 @@ void test_params()
|
||||
|
||||
void test_summaries()
|
||||
{
|
||||
state_tester state;
|
||||
dummy_bench bench;
|
||||
state_tester state{bench};
|
||||
ASSERT(state.get_summaries().size() == 0);
|
||||
|
||||
{
|
||||
|
||||
@@ -2,11 +2,27 @@
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/axis_base.cuh>
|
||||
#include <nvbench/benchmark.cuh>
|
||||
#include <nvbench/callable.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
// Mock up a benchmark for testing:
|
||||
void dummy_generator(nvbench::state &) {}
|
||||
NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable);
|
||||
using dummy_bench = nvbench::benchmark<dummy_callable>;
|
||||
|
||||
using floats = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
|
||||
using ints = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
|
||||
using misc = nvbench::type_list<void, bool>;
|
||||
using type_axes = nvbench::type_list<floats, ints, misc>;
|
||||
template <typename F, typename I, typename M>
|
||||
void template_generator(nvbench::state &, nvbench::type_list<F, I, M>){};
|
||||
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_generator, template_callable);
|
||||
using template_bench = nvbench::benchmark<template_callable, type_axes>;
|
||||
|
||||
struct state_generator_tester : nvbench::detail::state_generator
|
||||
{
|
||||
using nvbench::detail::state_generator::add_axis;
|
||||
@@ -124,16 +140,16 @@ void test_basic()
|
||||
|
||||
void test_create()
|
||||
{
|
||||
nvbench::axes_metadata axes;
|
||||
axes.add_float64_axis("Radians", {3.14, 6.28});
|
||||
axes.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
axes.add_int64_axis("NumInputs",
|
||||
{10, 15, 20},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
axes.add_string_axis("Strategy", {"Recursive", "Iterative"});
|
||||
dummy_bench bench;
|
||||
bench.add_float64_axis("Radians", {3.14, 6.28});
|
||||
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
bench.add_int64_axis("NumInputs",
|
||||
{10, 15, 20},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
|
||||
|
||||
const std::vector<std::vector<nvbench::state>> states =
|
||||
nvbench::detail::state_generator::create(axes);
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
// Outer vector has one entry per type_config. There are no type axes, so
|
||||
// there's only one type_config:
|
||||
@@ -161,7 +177,7 @@ void test_create()
|
||||
"Strategy");
|
||||
|
||||
std::size_t type_config = 0;
|
||||
std::size_t config = 0;
|
||||
std::size_t config = 0;
|
||||
for (const auto &inner_states : states)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
@@ -224,26 +240,19 @@ void test_create()
|
||||
fmt::format("Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test));
|
||||
}
|
||||
|
||||
|
||||
void test_create_with_types()
|
||||
{
|
||||
using floats = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
|
||||
using ints = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
|
||||
using misc = nvbench::type_list<void, bool>;
|
||||
|
||||
using type_axes = nvbench::type_list<floats, ints, misc>;
|
||||
|
||||
nvbench::axes_metadata axes;
|
||||
axes.set_type_axes_names<type_axes>({"Floats", "Ints", "Misc"});
|
||||
axes.add_float64_axis("Radians", {3.14, 6.28});
|
||||
axes.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
axes.add_int64_axis("NumInputs",
|
||||
{10, 15, 20},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
axes.add_string_axis("Strategy", {"Recursive", "Iterative"});
|
||||
template_bench bench;
|
||||
bench.set_type_axes_names({"Floats", "Ints", "Misc"});
|
||||
bench.add_float64_axis("Radians", {3.14, 6.28});
|
||||
bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none);
|
||||
bench.add_int64_axis("NumInputs",
|
||||
{10, 15, 20},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
bench.add_string_axis("Strategy", {"Recursive", "Iterative"});
|
||||
|
||||
const std::vector<std::vector<nvbench::state>> states =
|
||||
nvbench::detail::state_generator::create(axes);
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
// Outer vector has one entry per type_config
|
||||
// 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs
|
||||
@@ -257,8 +266,8 @@ void test_create_with_types()
|
||||
}
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
std::string table_format =
|
||||
"| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} | {:^7} | {:^9} | {:^9} |\n";
|
||||
std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} "
|
||||
"| {:^7} | {:^9} | {:^9} |\n";
|
||||
|
||||
fmt::format_to(buffer, "\n");
|
||||
fmt::format_to(buffer,
|
||||
@@ -274,7 +283,7 @@ void test_create_with_types()
|
||||
"Strategy");
|
||||
|
||||
std::size_t type_config = 0;
|
||||
std::size_t config = 0;
|
||||
std::size_t config = 0;
|
||||
for (const auto &inner_states : states)
|
||||
{
|
||||
for (const nvbench::state &state : inner_states)
|
||||
|
||||
Reference in New Issue
Block a user