mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 06:48:53 +00:00
Add termination criteria API.
- min_samples - min_time - max_noise - skip_time (not yet implemented) - timeout Refactored s/(trials)|(iters)/samples/s.
This commit is contained in:
@@ -16,8 +16,16 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
|
||||
auto result = this->do_clone();
|
||||
|
||||
// Do not copy states.
|
||||
result->m_name = m_name;
|
||||
result->m_axes = m_axes;
|
||||
result->m_name = m_name;
|
||||
result->m_axes = m_axes;
|
||||
result->m_devices = m_devices;
|
||||
|
||||
result->m_min_samples = m_min_samples;
|
||||
result->m_min_time = m_min_time;
|
||||
result->m_max_noise = m_max_noise;
|
||||
|
||||
result->m_skip_time = m_skip_time;
|
||||
result->m_timeout = m_timeout;
|
||||
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
@@ -113,6 +113,67 @@ struct benchmark_base
|
||||
|
||||
void run() { this->do_run(); }
|
||||
|
||||
/// Execute at least this many trials per measurement. @{
|
||||
[[nodiscard]] nvbench::int64_t get_min_samples() const
|
||||
{
|
||||
return m_min_samples;
|
||||
}
|
||||
benchmark_base &set_min_samples(nvbench::int64_t min_samples)
|
||||
{
|
||||
m_min_samples = min_samples;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
benchmark_base &set_min_time(nvbench::float64_t min_time)
|
||||
{
|
||||
m_min_time = min_time;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Specify the maximum amount of noise if a measurement supports noise.
|
||||
/// Noise is the relative standard deviation expressed as a percentage:
|
||||
/// `noise = 100 * (stdev / mean_time)`. @{
|
||||
[[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
|
||||
benchmark_base &set_max_noise(nvbench::float64_t max_noise)
|
||||
{
|
||||
m_max_noise = max_noise;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If a warmup run finishes in less than `skip_time`, the measurement will
|
||||
/// be skipped.
|
||||
/// Extremely fast kernels (< 5000 ns) often timeout before they can
|
||||
/// accumulate `min_time` measurements, and are often uninteresting. Setting
|
||||
/// this value can help improve performance by skipping time consuming
|
||||
/// measurement that don't provide much information.
|
||||
/// Default value is 0, which disable the feature.
|
||||
/// @{
|
||||
[[nodiscard]] nvbench::float64_t get_skip_time() const { return m_skip_time; }
|
||||
benchmark_base &set_skip_time(nvbench::float64_t skip_time)
|
||||
{
|
||||
m_skip_time = skip_time;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If a measurement take more than `timeout` seconds to complete, stop the
|
||||
/// measurement early. A warning should be printed if this happens.
|
||||
/// This setting overrides all other termination criteria.
|
||||
/// Note that this is measured in CPU walltime, not sample time.
|
||||
/// @{
|
||||
[[nodiscard]] nvbench::float64_t get_timeout() const { return m_timeout; }
|
||||
benchmark_base &set_timeout(nvbench::float64_t timeout)
|
||||
{
|
||||
m_timeout = timeout;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
protected:
|
||||
template <typename BenchmarkType>
|
||||
friend struct runner;
|
||||
@@ -122,6 +183,13 @@ protected:
|
||||
std::vector<nvbench::device_info> m_devices;
|
||||
std::vector<nvbench::state> m_states;
|
||||
|
||||
nvbench::int64_t m_min_samples{10};
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_noise{0.5};
|
||||
|
||||
nvbench::float64_t m_skip_time{0.};
|
||||
nvbench::float64_t m_timeout{15.};
|
||||
|
||||
private:
|
||||
// route these through virtuals so the templated subclass can inject type info
|
||||
virtual std::unique_ptr<benchmark_base> do_clone() const = 0;
|
||||
|
||||
@@ -17,6 +17,14 @@ namespace nvbench
|
||||
namespace detail
|
||||
{
|
||||
|
||||
measure_cold_base::measure_cold_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_max_noise{exec_state.get_max_noise()}
|
||||
, m_min_time{exec_state.get_min_time()}
|
||||
, m_timeout{exec_state.get_timeout()}
|
||||
{}
|
||||
|
||||
void measure_cold_base::check()
|
||||
{
|
||||
const auto device = m_state.get_device();
|
||||
@@ -38,8 +46,8 @@ void measure_cold_base::check()
|
||||
|
||||
void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto d_iters = static_cast<double>(m_total_iters);
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_iters;
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Cold)");
|
||||
summ.set_string("hint", "duration");
|
||||
@@ -60,7 +68,7 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_float64("value", m_cuda_noise);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_iters;
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Cold)");
|
||||
summ.set_string("hint", "duration");
|
||||
@@ -82,11 +90,11 @@ void measure_cold_base::generate_summaries()
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("Number of Trials (Cold)");
|
||||
summ.set_string("short_name", "Trials");
|
||||
auto &summ = m_state.add_summary("Number of Samples (Cold)");
|
||||
summ.set_string("short_name", "Samples");
|
||||
summ.set_string("description",
|
||||
"Number of kernel executions in cold time measurements.");
|
||||
summ.set_int64("value", m_total_iters);
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
// Log to stdout:
|
||||
@@ -127,7 +135,7 @@ void measure_cold_base::generate_summaries()
|
||||
avg_cuda_time * 1e3,
|
||||
avg_cpu_time * 1e3,
|
||||
m_total_cuda_time,
|
||||
m_total_iters);
|
||||
m_total_samples);
|
||||
if (m_max_time_exceeded)
|
||||
{
|
||||
if (m_cuda_noise > m_max_noise)
|
||||
@@ -137,12 +145,12 @@ void measure_cold_base::generate_summaries()
|
||||
m_cuda_noise,
|
||||
m_max_noise);
|
||||
}
|
||||
if (m_total_iters < m_min_iters)
|
||||
if (m_total_samples < m_min_samples)
|
||||
{
|
||||
fmt::print("!!!! Previous benchmark exceeded max time before "
|
||||
"accumulating min samples ({} < {})\n",
|
||||
m_total_iters,
|
||||
m_min_iters);
|
||||
m_total_samples,
|
||||
m_min_samples);
|
||||
}
|
||||
if (m_total_cuda_time < m_min_time)
|
||||
{
|
||||
|
||||
@@ -26,9 +26,7 @@ namespace detail
|
||||
// non-templated code goes here:
|
||||
struct measure_cold_base
|
||||
{
|
||||
explicit measure_cold_base(nvbench::state &exec_state)
|
||||
: m_state(exec_state)
|
||||
{}
|
||||
explicit measure_cold_base(nvbench::state &exec_state);
|
||||
measure_cold_base(const measure_cold_base &) = delete;
|
||||
measure_cold_base(measure_cold_base &&) = delete;
|
||||
measure_cold_base &operator=(const measure_cold_base &) = delete;
|
||||
@@ -43,7 +41,7 @@ protected:
|
||||
m_total_cpu_time = 0.;
|
||||
m_cuda_noise = 0.;
|
||||
m_cpu_noise = 0.;
|
||||
m_total_iters = 0;
|
||||
m_total_samples = 0;
|
||||
m_cuda_times.clear();
|
||||
m_cpu_times.clear();
|
||||
m_max_time_exceeded = false;
|
||||
@@ -59,18 +57,16 @@ protected:
|
||||
nvbench::cpu_timer m_timeout_timer;
|
||||
nvbench::detail::l2flush m_l2flush;
|
||||
|
||||
nvbench::int64_t m_min_iters{10};
|
||||
nvbench::int64_t m_total_iters{};
|
||||
|
||||
nvbench::float64_t m_max_noise{0.5}; // % rel stdev
|
||||
nvbench::float64_t m_cuda_noise{}; // % rel stdev
|
||||
nvbench::float64_t m_cpu_noise{}; // % rel stdev
|
||||
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_time{5.0};
|
||||
nvbench::int64_t m_min_samples{};
|
||||
nvbench::float64_t m_max_noise{}; // % rel stdev
|
||||
nvbench::float64_t m_min_time{};
|
||||
nvbench::float64_t m_timeout{};
|
||||
|
||||
nvbench::int64_t m_total_samples{};
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
nvbench::float64_t m_cuda_noise{}; // % rel stdev
|
||||
nvbench::float64_t m_cpu_noise{}; // % rel stdev
|
||||
|
||||
std::vector<nvbench::float64_t> m_cuda_times;
|
||||
std::vector<nvbench::float64_t> m_cpu_times;
|
||||
@@ -128,7 +124,7 @@ private:
|
||||
m_cpu_times.push_back(cur_cpu_time);
|
||||
m_total_cuda_time += cur_cuda_time;
|
||||
m_total_cpu_time += cur_cpu_time;
|
||||
++m_total_iters;
|
||||
++m_total_samples;
|
||||
|
||||
// Only consider the cuda noise in the convergence criteria.
|
||||
m_cuda_noise = nvbench::detail::compute_noise(m_cuda_times,
|
||||
@@ -137,14 +133,14 @@ private:
|
||||
m_timeout_timer.stop();
|
||||
const auto total_time = m_timeout_timer.get_duration();
|
||||
|
||||
if (m_total_cuda_time > m_min_time && // Min time okay
|
||||
m_total_iters > m_min_iters && // Min iters okay
|
||||
m_cuda_noise < m_max_noise) // Noise okay
|
||||
if (m_total_cuda_time > m_min_time && // Min time okay
|
||||
m_total_samples > m_min_samples && // Min samples okay
|
||||
m_cuda_noise < m_max_noise) // Noise okay
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (total_time > m_max_time) // Max time exceeded, stop iterating.
|
||||
if (total_time > m_timeout) // Max time exceeded, stop iterating.
|
||||
{
|
||||
m_max_time_exceeded = true;
|
||||
break;
|
||||
|
||||
@@ -36,35 +36,35 @@ void measure_hot_base::check()
|
||||
}
|
||||
|
||||
measure_hot_base::measure_hot_base(state &exec_state)
|
||||
: m_state(exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_min_time{exec_state.get_min_time()}
|
||||
, m_timeout{exec_state.get_timeout()}
|
||||
{
|
||||
// Since cold measures converge to a stable result, increase the min_iters
|
||||
// Since cold measures converge to a stable result, increase the min_samples
|
||||
// to match the cold result if available.
|
||||
try
|
||||
{
|
||||
nvbench::int64_t cold_iters =
|
||||
m_state.get_summary("Number of Trials (Cold)").get_int64("value");
|
||||
m_min_iters = std::max(m_min_iters, cold_iters);
|
||||
nvbench::int64_t cold_samples =
|
||||
m_state.get_summary("Number of Samples (Cold)").get_int64("value");
|
||||
m_min_samples = std::max(m_min_samples, cold_samples);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// TODO Need state API
|
||||
// m_min_iters = state.get_min_trials();
|
||||
//
|
||||
|
||||
// Apply the target_time since we don't have noise convergence estimates
|
||||
// from the cold executions:
|
||||
// TODO Need state API. Replace the following line with the commented one
|
||||
const auto target_time = (m_min_time + m_max_time) / 2.;
|
||||
// const auto target_time = state.get_target_time();
|
||||
m_min_time = std::max(m_min_time, target_time);
|
||||
// If the above threw an exception, we don't have a cold measurement to use.
|
||||
// Estimate a target_time between m_min_time and m_timeout.
|
||||
// Use the average of the min_time and timeout, but don't go over 5x
|
||||
// min_time in case timeout is huge.
|
||||
// We could expose a `target_time` property on benchmark_base/state if
|
||||
// needed.
|
||||
m_min_time = std::min((m_min_time + m_timeout) / 2., m_min_time * 5);
|
||||
}
|
||||
}
|
||||
|
||||
void measure_hot_base::generate_summaries()
|
||||
{
|
||||
const auto d_iters = static_cast<double>(m_total_iters);
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_iters;
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Hot)");
|
||||
summ.set_string("hint", "duration");
|
||||
@@ -75,7 +75,7 @@ void measure_hot_base::generate_summaries()
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_iters;
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Hot)");
|
||||
summ.set_string("hide",
|
||||
@@ -89,11 +89,11 @@ void measure_hot_base::generate_summaries()
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("Number of Trials (Hot)");
|
||||
summ.set_string("short_name", "Trials");
|
||||
auto &summ = m_state.add_summary("Number of Samples (Hot)");
|
||||
summ.set_string("short_name", "Samples");
|
||||
summ.set_string("description",
|
||||
"Number of kernel executions in hot time measurements.");
|
||||
summ.set_int64("value", m_total_iters);
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_items_processed_per_launch(); items != 0)
|
||||
@@ -171,15 +171,15 @@ void measure_hot_base::generate_summaries()
|
||||
avg_cuda_time * 1e3,
|
||||
avg_cpu_time * 1e3,
|
||||
m_total_cuda_time,
|
||||
m_total_iters);
|
||||
m_total_samples);
|
||||
if (m_max_time_exceeded)
|
||||
{
|
||||
if (m_total_iters < m_min_iters)
|
||||
if (m_total_samples < m_min_samples)
|
||||
{
|
||||
fmt::print("!!!! Previous benchmark exceeded max time before "
|
||||
"accumulating min samples ({} < {})\n",
|
||||
m_total_iters,
|
||||
m_min_iters);
|
||||
m_total_samples,
|
||||
m_min_samples);
|
||||
}
|
||||
if (m_total_cuda_time < m_min_time)
|
||||
{
|
||||
|
||||
@@ -33,7 +33,7 @@ protected:
|
||||
{
|
||||
m_total_cpu_time = 0.;
|
||||
m_total_cuda_time = 0.;
|
||||
m_total_iters = 0;
|
||||
m_total_samples = 0;
|
||||
m_max_time_exceeded = false;
|
||||
}
|
||||
|
||||
@@ -46,12 +46,11 @@ protected:
|
||||
nvbench::cpu_timer m_cpu_timer;
|
||||
nvbench::cpu_timer m_timeout_timer;
|
||||
|
||||
nvbench::int64_t m_total_iters{};
|
||||
nvbench::int64_t m_min_iters{10};
|
||||
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_time{5.0};
|
||||
nvbench::int64_t m_min_samples{};
|
||||
nvbench::float64_t m_min_time{};
|
||||
nvbench::float64_t m_timeout{};
|
||||
|
||||
nvbench::int64_t m_total_samples{};
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
|
||||
@@ -102,7 +101,7 @@ private:
|
||||
// Block stream until some work is queued.
|
||||
// Limit the number of kernel executions while blocked to prevent
|
||||
// deadlocks. See warnings on blocking_kernel.
|
||||
const auto blocked_launches = std::min(batch_size, nvbench::int64_t{2});
|
||||
const auto blocked_launches = std::min(batch_size, nvbench::int64_t{2});
|
||||
const auto unblocked_launches = batch_size - blocked_launches;
|
||||
|
||||
blocker.block(m_launch.get_stream());
|
||||
@@ -129,22 +128,22 @@ private:
|
||||
|
||||
m_total_cpu_time += m_cpu_timer.get_duration();
|
||||
m_total_cuda_time += m_cuda_timer.get_duration();
|
||||
m_total_iters += batch_size;
|
||||
m_total_samples += batch_size;
|
||||
|
||||
// Predict number of remaining iterations:
|
||||
batch_size = (m_min_time - m_total_cuda_time) /
|
||||
(m_total_cuda_time / m_total_iters);
|
||||
(m_total_cuda_time / m_total_samples);
|
||||
|
||||
m_timeout_timer.stop();
|
||||
const auto total_time = m_timeout_timer.get_duration();
|
||||
|
||||
if (m_total_cuda_time > m_min_time && // min time okay
|
||||
m_total_iters > m_min_iters) // min iters okay
|
||||
m_total_samples > m_min_samples) // min samples okay
|
||||
{
|
||||
break; // Stop iterating
|
||||
}
|
||||
|
||||
if (m_total_cuda_time > m_max_time)
|
||||
if (m_total_cuda_time > m_timeout)
|
||||
{
|
||||
m_max_time_exceeded = true;
|
||||
break;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
@@ -11,6 +12,30 @@
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
state::state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
, m_skip_time{bench.get_skip_time()}
|
||||
, m_timeout{bench.get_timeout()}
|
||||
{}
|
||||
|
||||
state::state(const benchmark_base &bench,
|
||||
nvbench::named_values values,
|
||||
std::optional<nvbench::device_info> device,
|
||||
std::size_t type_config_index)
|
||||
: m_benchmark{bench}
|
||||
, m_axis_values{std::move(values)}
|
||||
, m_device{std::move(device)}
|
||||
, m_type_config_index{type_config_index}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
, m_skip_time{bench.get_skip_time()}
|
||||
, m_timeout{bench.get_timeout()}
|
||||
{}
|
||||
|
||||
nvbench::int64_t state::get_int64(const std::string &axis_name) const
|
||||
{
|
||||
return m_axis_values.get_int64(axis_name);
|
||||
|
||||
@@ -90,6 +90,50 @@ struct state
|
||||
return m_skip_reason;
|
||||
}
|
||||
|
||||
/// Execute at least this many trials per measurement. @{
|
||||
[[nodiscard]] nvbench::int64_t get_min_samples() const
|
||||
{
|
||||
return m_min_samples;
|
||||
}
|
||||
void set_min_samples(nvbench::int64_t min_samples)
|
||||
{
|
||||
m_min_samples = min_samples;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
|
||||
/// @}
|
||||
|
||||
/// Specify the maximum amount of noise if a measurement supports noise.
|
||||
/// Noise is the relative standard deviation expressed as a percentage:
|
||||
/// `noise = 100 * (stdev / mean_time)`. @{
|
||||
[[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
|
||||
void set_max_noise(nvbench::float64_t max_noise) { m_max_noise = max_noise; }
|
||||
/// @}
|
||||
|
||||
/// If a warmup run finishes in less than `skip_time`, the measurement will
|
||||
/// be skipped.
|
||||
/// Extremely fast kernels (< 5000 ns) often timeout before they can
|
||||
/// accumulate `min_time` measurements, and are often uninteresting. Setting
|
||||
/// this value can help improve performance by skipping time consuming
|
||||
/// measurement that don't provide much information.
|
||||
/// Default value is 0, which disable the feature.
|
||||
/// @{
|
||||
[[nodiscard]] nvbench::float64_t get_skip_time() const { return m_skip_time; }
|
||||
void set_skip_time(nvbench::float64_t skip_time) { m_skip_time = skip_time; }
|
||||
/// @}
|
||||
|
||||
/// If a measurement take more than `timeout` seconds to complete, stop the
|
||||
/// measurement early. A warning should be printed if this happens.
|
||||
/// This setting overrides all other termination criteria.
|
||||
/// Note that this is measured in CPU walltime, not sample time.
|
||||
/// @{
|
||||
[[nodiscard]] nvbench::float64_t get_timeout() const { return m_timeout; }
|
||||
void set_timeout(nvbench::float64_t timeout) { m_timeout = timeout; }
|
||||
/// @}
|
||||
|
||||
[[nodiscard]] const named_values &get_axis_values() const
|
||||
{
|
||||
return m_axis_values;
|
||||
@@ -111,25 +155,25 @@ private:
|
||||
friend struct nvbench::detail::state_generator;
|
||||
friend struct nvbench::detail::state_tester;
|
||||
|
||||
explicit state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
{}
|
||||
explicit state(const benchmark_base &bench);
|
||||
|
||||
state(const benchmark_base &bench,
|
||||
nvbench::named_values values,
|
||||
std::optional<nvbench::device_info> device,
|
||||
std::size_t type_config_index)
|
||||
: m_benchmark{bench}
|
||||
, m_axis_values{std::move(values)}
|
||||
, m_device{std::move(device)}
|
||||
, m_type_config_index{type_config_index}
|
||||
{}
|
||||
std::size_t type_config_index);
|
||||
|
||||
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
|
||||
nvbench::named_values m_axis_values;
|
||||
std::optional<nvbench::device_info> m_device;
|
||||
std::size_t m_type_config_index{};
|
||||
|
||||
nvbench::int64_t m_min_samples;
|
||||
nvbench::float64_t m_min_time;
|
||||
nvbench::float64_t m_max_noise;
|
||||
|
||||
nvbench::float64_t m_skip_time;
|
||||
nvbench::float64_t m_timeout;
|
||||
|
||||
std::vector<nvbench::summary> m_summaries;
|
||||
std::string m_skip_reason;
|
||||
nvbench::int64_t m_items_processed_per_launch{};
|
||||
|
||||
@@ -717,16 +717,10 @@ void test_devices()
|
||||
ASSERT(states.size() == 12);
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
const std::string table_format =
|
||||
"| {:^5} | {:^6} | {:^5} | {:^3} |\n";
|
||||
const std::string table_format = "| {:^5} | {:^6} | {:^5} | {:^3} |\n";
|
||||
|
||||
fmt::format_to(buffer, "\n");
|
||||
fmt::format_to(buffer,
|
||||
table_format,
|
||||
"State",
|
||||
"Device",
|
||||
"S",
|
||||
"I");
|
||||
fmt::format_to(buffer, table_format, "State", "Device", "S", "I");
|
||||
|
||||
std::size_t config = 0;
|
||||
for (const auto &state : states)
|
||||
@@ -760,6 +754,36 @@ void test_devices()
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
|
||||
void test_termination_criteria()
|
||||
{
|
||||
const nvbench::int64_t min_samples = 1000;
|
||||
const nvbench::float64_t min_time = 2000;
|
||||
const nvbench::float64_t max_noise = 3000;
|
||||
const nvbench::float64_t skip_time = 4000;
|
||||
const nvbench::float64_t timeout = 5000;
|
||||
|
||||
// for comparing floats
|
||||
auto within_one = [](auto a, auto b) { return std::abs(a - b) < 1.; };
|
||||
|
||||
dummy_bench bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.set_min_samples(min_samples);
|
||||
bench.set_min_time(min_time);
|
||||
bench.set_max_noise(max_noise);
|
||||
bench.set_skip_time(skip_time);
|
||||
bench.set_timeout(timeout);
|
||||
|
||||
const std::vector<nvbench::state> states =
|
||||
nvbench::detail::state_generator::create(bench);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(min_samples == states[0].get_min_samples());
|
||||
ASSERT(within_one(min_time, states[0].get_min_time()));
|
||||
ASSERT(within_one(max_noise, states[0].get_max_noise()));
|
||||
ASSERT(within_one(skip_time, states[0].get_skip_time()));
|
||||
ASSERT(within_one(timeout, states[0].get_timeout()));
|
||||
}
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
@@ -770,9 +794,11 @@ try
|
||||
test_create_with_types();
|
||||
test_create_with_masked_types();
|
||||
test_devices();
|
||||
test_termination_criteria();
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception& e)
|
||||
catch (std::exception &e)
|
||||
{
|
||||
fmt::print("{}\n", e.what());
|
||||
return 1;
|
||||
|
||||
Reference in New Issue
Block a user