Merge pull request #151 from gevtushenko/entropy

Entropy-based stopping criterion
This commit is contained in:
Georgii Evtushenko
2024-01-12 23:19:25 +04:00
committed by GitHub
32 changed files with 1867 additions and 121 deletions

63
.clangd Normal file
View File

@@ -0,0 +1,63 @@
# https://clangd.llvm.org/config
# Apply a config conditionally to all C files
If:
PathMatch: .*\.(c|h)$
---
# Apply a config conditionally to all C++ files
If:
PathMatch: .*\.(c|h)pp
---
# Apply a config conditionally to all CUDA files
If:
PathMatch: .*\.cuh?
CompileFlags:
Add:
# Allow variadic CUDA functions
- "-Xclang=-fcuda-allow-variadic-functions"
---
# Tweak the clangd parse settings for all files
CompileFlags:
Compiler: clang++
CompilationDatabase: .
Add:
- -x
- cuda
# report all errors
- "-ferror-limit=0"
- "-ftemplate-backtrace-limit=0"
- "-stdlib=libc++"
- "-std=c++17"
Remove:
# strip CUDA fatbin args
- "-Xfatbin*"
- "-Xcompiler*"
- "-Xcudafe*"
- "-rdc=*"
- "-gpu=*"
- "--diag_suppress*"
# strip CUDA arch flags
- "-gencode*"
- "--generate-code*"
# strip gcc's -fcoroutines
- -fcoroutines
# strip CUDA flags unknown to clang
- "-ccbin*"
- "--compiler-options*"
- "--expt-extended-lambda"
- "--expt-relaxed-constexpr"
- "-forward-unknown-to-host-compiler"
- "-Werror=cross-execution-space-call"
Diagnostics:
Suppress:
- "variadic_device_fn"
- "attributes_not_allowed"
# The NVHPC version of _NVCXX_EXPAND_PACK macro triggers this clang error.
# Temporarily suppressing it, but should probably fix
- "template_param_shadow"

View File

@@ -89,8 +89,15 @@
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.
* `--stopping-criterion <criterion>`
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
samples were collected.
* Only applies to Cold measurements.
* Default is stdrel (`--stopping-criterion stdrel`)
* `--min-time <seconds>`
* Accumulate at least `<seconds>` of execution time per measurement.
* Only applies to `stdrel` stopping criterion.
* Default is 0.5 seconds.
* If both GPU and CPU times are gathered, this applies to GPU time only.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
@@ -100,6 +107,7 @@
* Gather samples until the error in the measurement drops below `<value>`.
* Noise is specified as the percent relative standard deviation.
* Default is 0.5% (`--max-noise 0.5`)
* Only applies to `stdrel` stopping criterion.
* Only applies to Cold measurements.
* If both GPU and CPU times are gathered, this applies to GPU noise only.
* Applies to the most recent `--benchmark`, or all benchmarks if specified

View File

@@ -1,12 +1,13 @@
set(example_srcs
auto_throughput.cu
axes.cu
custom_criterion.cu
enums.cu
exec_tag_sync.cu
exec_tag_timer.cu
skip.cu
stream.cu
throughput.cu
auto_throughput.cu
)
# Metatarget for all examples:

View File

@@ -0,0 +1,80 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/nvbench.cuh>
// Grab some testing kernels from NVBench:
#include <nvbench/test_kernels.cuh>
// Thrust vectors simplify memory management:
#include <thrust/device_vector.h>
// Inherit from the stopping_criterion_base class:
class fixed_criterion final : public nvbench::stopping_criterion_base
{
nvbench::int64_t m_num_samples{};
public:
fixed_criterion()
: nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}}
{}
protected:
// Setup the criterion in the `do_initialize()` method:
virtual void do_initialize() override
{
m_num_samples = 0;
}
// Process new measurements in the `add_measurement()` method:
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
{
m_num_samples++;
}
// Check if the stopping criterion is met in the `is_finished()` method:
virtual bool do_is_finished() override
{
return m_num_samples >= m_params.get_int64("max-samples");
}
};
// Register the criterion with NVBench:
NVBENCH_REGISTER_CRITERION(fixed_criterion);
void throughput_bench(nvbench::state &state)
{
// Allocate input data:
const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t);
thrust::device_vector<nvbench::int32_t> input(num_values);
thrust::device_vector<nvbench::int32_t> output(num_values);
// Provide throughput information:
state.add_element_count(num_values, "NumElements");
state.add_global_memory_reads<nvbench::int32_t>(num_values, "DataSize");
state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),
num_values);
});
}
NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed");

View File

@@ -4,6 +4,7 @@ set(srcs
benchmark_base.cxx
benchmark_manager.cxx
blocking_kernel.cu
criterion_manager.cxx
csv_printer.cu
cuda_call.cu
device_info.cu
@@ -17,13 +18,16 @@ set(srcs
printer_multiplex.cxx
runner.cxx
state.cxx
stopping_criterion.cxx
string_axis.cxx
type_axis.cxx
type_strings.cxx
detail/entropy_criterion.cxx
detail/measure_cold.cu
detail/measure_hot.cu
detail/state_generator.cxx
detail/stdrel_criterion.cxx
internal/nvml.cxx
)

View File

@@ -22,6 +22,7 @@
#include <nvbench/device_info.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/state.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <functional> // reference_wrapper, ref
#include <memory>
@@ -181,22 +182,30 @@ struct benchmark_base
}
/// @}
/// Accumulate at least this many seconds of timing data per measurement. @{
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
/// Accumulate at least this many seconds of timing data per measurement.
/// Only applies to `stdrel` stopping criterion. @{
[[nodiscard]] nvbench::float64_t get_min_time() const
{
return m_criterion_params.get_float64("min-time");
}
benchmark_base &set_min_time(nvbench::float64_t min_time)
{
m_min_time = min_time;
m_criterion_params.set_float64("min-time", min_time);
return *this;
}
/// @}
/// Specify the maximum amount of noise if a measurement supports noise.
/// Noise is the relative standard deviation:
/// `noise = stdev / mean_time`. @{
[[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
/// `noise = stdev / mean_time`.
/// Only applies to `stdrel` stopping criterion. @{
[[nodiscard]] nvbench::float64_t get_max_noise() const
{
return m_criterion_params.get_float64("max-noise");
}
benchmark_base &set_max_noise(nvbench::float64_t max_noise)
{
m_max_noise = max_noise;
m_criterion_params.set_float64("max-noise", max_noise);
return *this;
}
/// @}
@@ -230,6 +239,19 @@ struct benchmark_base
}
/// @}
[[nodiscard]] nvbench::criterion_params& get_criterion_params() { return m_criterion_params; }
[[nodiscard]] const nvbench::criterion_params& get_criterion_params() const { return m_criterion_params; }
/// Control the stopping criterion for the measurement loop.
/// @{
[[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; }
benchmark_base &set_stopping_criterion(std::string criterion)
{
m_stopping_criterion = std::move(criterion);
return *this;
}
/// @}
protected:
friend struct nvbench::runner_base;
@@ -247,12 +269,13 @@ protected:
bool m_disable_blocking_kernel{false};
nvbench::int64_t m_min_samples{10};
nvbench::float64_t m_min_time{0.5};
nvbench::float64_t m_max_noise{0.005}; // 0.5% relative standard deviation
nvbench::float64_t m_skip_time{-1.};
nvbench::float64_t m_timeout{15.};
nvbench::criterion_params m_criterion_params;
std::string m_stopping_criterion{"stdrel"};
private:
// route these through virtuals so the templated subclass can inject type info
virtual std::unique_ptr<benchmark_base> do_clone() const = 0;

View File

@@ -34,13 +34,14 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
result->m_axes = m_axes;
result->m_devices = m_devices;
result->m_min_samples = m_min_samples;
result->m_min_time = m_min_time;
result->m_max_noise = m_max_noise;
result->m_min_samples = m_min_samples;
result->m_criterion_params = m_criterion_params;
result->m_skip_time = m_skip_time;
result->m_timeout = m_timeout;
result->m_stopping_criterion = m_stopping_criterion;
return result;
}

View File

@@ -0,0 +1,65 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/detail/entropy_criterion.cuh>
#include <nvbench/detail/stdrel_criterion.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <memory>
#include <unordered_map>
namespace nvbench
{
class criterion_manager
{
std::unordered_map<std::string, std::unique_ptr<nvbench::stopping_criterion_base>> m_map;
criterion_manager();
public:
/**
* @return The singleton criterion_manager instance.
*/
static criterion_manager& get();
/**
* Register a new stopping criterion.
*/
nvbench::stopping_criterion_base& add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
nvbench::stopping_criterion_base& get_criterion(const std::string& name);
const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const;
using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
params_description get_params_description() const;
};
/**
* Given a stopping criterion type `TYPE`, registers it in the criterion manager
*
* See the `custom_criterion.cu` example for usage.
*/
#define NVBENCH_REGISTER_CRITERION(TYPE) \
static nvbench::stopping_criterion_base &NVBENCH_UNIQUE_IDENTIFIER(TYPE) = \
nvbench::criterion_manager::get().add(std::make_unique<TYPE>())
} // namespace nvbench

View File

@@ -0,0 +1,100 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/criterion_manager.cuh>
#include <nvbench/detail/throw.cuh>
namespace nvbench
{
criterion_manager::criterion_manager()
{
m_map.emplace("stdrel", std::make_unique<nvbench::detail::stdrel_criterion>());
m_map.emplace("entropy", std::make_unique<nvbench::detail::entropy_criterion>());
}
criterion_manager &criterion_manager::get()
{
static criterion_manager registry;
return registry;
}
stopping_criterion_base& criterion_manager::get_criterion(const std::string& name)
{
auto iter = m_map.find(name);
if (iter == m_map.end())
{
NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name);
}
return *iter->second.get();
}
const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const
{
auto iter = m_map.find(name);
if (iter == m_map.end())
{
NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name);
}
return *iter->second.get();
}
stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criterion_base> criterion)
{
const std::string name = criterion->get_name();
auto [it, success] = m_map.emplace(name, std::move(criterion));
if (!success)
{
NVBENCH_THROW(std::runtime_error,
"Stopping criterion \"{}\" is already registered.", name);
}
return *it->second.get();
}
nvbench::criterion_manager::params_description criterion_manager::get_params_description() const
{
nvbench::criterion_manager::params_description desc;
for (auto &[criterion_name, criterion] : m_map)
{
nvbench::criterion_params params = criterion->get_params();
for (auto param : params.get_names())
{
nvbench::named_values::type type = params.get_type(param);
if (std::find_if(desc.begin(), desc.end(), [&](auto d) {
return d.first == param && d.second != type;
}) != desc.end())
{
NVBENCH_THROW(std::runtime_error,
"Stopping criterion \"{}\" parameter \"{}\" is already used by another "
"criterion with a different type.",
criterion_name,
param);
}
desc.emplace_back(param, type);
}
}
return desc;
}
} // namespace nvbench

View File

@@ -0,0 +1,55 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <vector>
namespace nvbench::detail
{
class entropy_criterion final : public stopping_criterion_base
{
// state
nvbench::int64_t m_total_samples{};
nvbench::float64_t m_total_cuda_time{};
std::vector<std::pair<nvbench::float64_t, nvbench::int64_t>> m_freq_tracker;
// TODO The window size should be user-configurable
nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299};
// Used to avoid re-allocating temporary memory
std::vector<nvbench::float64_t> m_probabilities;
nvbench::float64_t compute_entropy();
public:
entropy_criterion();
protected:
virtual void do_initialize() override;
virtual void do_add_measurement(nvbench::float64_t measurement) override;
virtual bool do_is_finished() override;
};
} // namespace nvbench::detail

View File

@@ -0,0 +1,137 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/entropy_criterion.cuh>
#include <nvbench/types.cuh>
#include <cmath>
namespace nvbench::detail
{
entropy_criterion::entropy_criterion()
: stopping_criterion_base{"entropy", {{"max-angle", 0.048}, {"min-r2", 0.36}}}
{
m_freq_tracker.reserve(m_entropy_tracker.capacity() * 2);
m_probabilities.reserve(m_entropy_tracker.capacity() * 2);
}
void entropy_criterion::do_initialize()
{
m_total_samples = 0;
m_total_cuda_time = 0.0;
m_entropy_tracker.clear();
m_freq_tracker.clear();
}
nvbench::float64_t entropy_criterion::compute_entropy()
{
const std::size_t n = m_freq_tracker.size();
if (n == 0)
{
return 0.0;
}
m_probabilities.resize(n);
for (std::size_t i = 0; i < n; i++)
{
m_probabilities[i] = static_cast<nvbench::float64_t>(m_freq_tracker[i].second) /
static_cast<nvbench::float64_t>(m_total_samples);
}
nvbench::float64_t entropy{};
for (nvbench::float64_t p : m_probabilities)
{
entropy -= p * std::log2(p);
}
return entropy;
}
void entropy_criterion::do_add_measurement(nvbench::float64_t measurement)
{
m_total_samples++;
m_total_cuda_time += measurement;
{
auto key = measurement;
constexpr bool bin_keys = false;
if (bin_keys)
{
const auto resolution_us = 0.5;
const auto resulution_s = resolution_us / 1'000'000;
const auto epsilon = resulution_s * 2;
key = std::round(key / epsilon) * epsilon;
}
// This approach is about 3x faster than `std::{unordered_,}map`
// Up to 100k samples, only about 20% slower than corresponding stdrel method
auto it = std::lower_bound(m_freq_tracker.begin(),
m_freq_tracker.end(),
std::make_pair(key, nvbench::int64_t{}));
if (it != m_freq_tracker.end() && it->first == key)
{
it->second += 1;
}
else
{
m_freq_tracker.insert(it, std::make_pair(key, nvbench::int64_t{1}));
}
}
m_entropy_tracker.push_back(compute_entropy());
}
bool entropy_criterion::do_is_finished()
{
if (m_entropy_tracker.size() < 2)
{
return false;
}
// Even number of samples is used to reduce the overhead and not required to compute entropy.
// This makes `is_finished()` about 20% faster than corresponding stdrel method.
if (m_total_samples % 2 != 0)
{
return false;
}
auto begin = m_entropy_tracker.cbegin();
auto end = m_entropy_tracker.cend();
auto mean = statistics::compute_mean(begin, end);
const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
{
return false;
}
const auto r2 = statistics::compute_r2(begin, end, mean, slope, intercept);
if (r2 < m_params.get_float64("min-r2"))
{
return false;
}
return true;
}
} // namespace nvbench::detail

View File

@@ -16,38 +16,37 @@
* limitations under the License.
*/
#include <nvbench/detail/measure_cold.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/detail/measure_cold.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/printer_base.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#include <algorithm>
#include <cstdio>
#include <stdexcept>
#include <variant>
namespace nvbench::detail
{
measure_cold_base::measure_cold_base(state &exec_state)
: m_state{exec_state}
, m_launch{m_state.get_cuda_stream()}
, m_criterion_params{exec_state.get_criterion_params()}
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())}
, m_run_once{exec_state.get_run_once()}
, m_no_block{exec_state.get_disable_blocking_kernel()}
, m_min_samples{exec_state.get_min_samples()}
, m_max_noise{exec_state.get_max_noise()}
, m_min_time{exec_state.get_min_time()}
, m_skip_time{exec_state.get_skip_time()}
, m_timeout{exec_state.get_timeout()}
{}
{
if (m_min_samples > 0)
{
m_cuda_times.reserve(m_min_samples);
m_cpu_times.reserve(m_min_samples);
}
}
void measure_cold_base::check()
{
@@ -68,10 +67,11 @@ void measure_cold_base::initialize()
m_total_cpu_time = 0.;
m_cpu_noise = 0.;
m_total_samples = 0;
m_noise_tracker.clear();
m_cuda_times.clear();
m_cpu_times.clear();
m_max_time_exceeded = false;
m_stopping_criterion.initialize(m_criterion_params);
}
void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
@@ -87,16 +87,7 @@ void measure_cold_base::record_measurements()
m_total_cpu_time += cur_cpu_time;
++m_total_samples;
// Compute convergence statistics using CUDA timings:
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
m_cuda_times.cend(),
mean_cuda_time);
auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
if (std::isfinite(cuda_rel_stdev))
{
m_noise_tracker.push_back(cuda_rel_stdev);
}
m_stopping_criterion.add_measurement(cur_cuda_time);
}
bool measure_cold_base::is_finished()
@@ -107,39 +98,12 @@ bool measure_cold_base::is_finished()
}
// Check that we've gathered enough samples:
if (m_total_cuda_time > m_min_time && m_total_samples > m_min_samples)
if (m_total_samples > m_min_samples)
{
// Noise has dropped below threshold
if (m_noise_tracker.back() < m_max_noise)
if (m_stopping_criterion.is_finished())
{
return true;
}
// Check if the noise (cuda rel stdev) has converged by inspecting a
// trailing window of recorded noise measurements.
// This helps identify benchmarks that are inherently noisy and would
// never converge to the target stdev threshold. This check ensures that the
// benchmark will end if the stdev stabilizes above the target threshold.
// Gather some iterations before checking noise, and limit how often we
// check this.
if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
{
// Use the current noise as the stdev reference.
const auto current_noise = m_noise_tracker.back();
const auto noise_stdev =
nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(),
m_noise_tracker.cend(),
current_noise);
const auto noise_rel_stdev = noise_stdev / current_noise;
// If the rel stdev of the last N cuda noise measurements is less than
// 5%, consider the result stable.
const auto noise_threshold = 0.05;
if (noise_rel_stdev < noise_threshold)
{
return true;
}
}
}
// Check for timeouts:
@@ -206,14 +170,21 @@ void measure_cold_base::generate_summaries()
summ.set_float64("value", avg_cuda_time);
}
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
m_cuda_times.cend(),
mean_cuda_time);
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
const auto noise = cuda_rel_stdev;
const auto max_noise = m_criterion_params.get_float64("max-noise");
const auto min_time = m_criterion_params.get_float64("min-time");
{
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative");
summ.set_string("name", "Noise");
summ.set_string("hint", "percentage");
summ.set_string("description", "Relative standard deviation of isolated GPU times");
summ.set_float64("value",
m_noise_tracker.empty() ? std::numeric_limits<nvbench::float64_t>::infinity()
: m_noise_tracker.back());
summ.set_float64("value", noise);
}
if (const auto items = m_state.get_element_count(); items != 0)
@@ -270,15 +241,15 @@ void measure_cold_base::generate_summaries()
{
const auto timeout = m_walltime_timer.get_duration();
if (!m_noise_tracker.empty() && m_noise_tracker.back() > m_max_noise)
if (noise > max_noise)
{
printer.log(nvbench::log_level::warn,
fmt::format("Current measurement timed out ({:0.2f}s) "
"while over noise threshold ({:0.2f}% > "
"{:0.2f}%)",
timeout,
m_noise_tracker.back() * 100,
m_max_noise * 100));
noise * 100,
max_noise * 100));
}
if (m_total_samples < m_min_samples)
{
@@ -289,7 +260,7 @@ void measure_cold_base::generate_summaries()
m_total_samples,
m_min_samples));
}
if (m_total_cuda_time < m_min_time)
if (m_total_cuda_time < min_time)
{
printer.log(nvbench::log_level::warn,
fmt::format("Current measurement timed out ({:0.2f}s) "
@@ -297,7 +268,7 @@ void measure_cold_base::generate_summaries()
"{:0.2f}s)",
timeout,
m_total_cuda_time,
m_min_time));
min_time));
}
}

View File

@@ -25,15 +25,14 @@
#include <nvbench/device_info.cuh>
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/l2flush.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/detail/statistics.cuh>
#include <cuda_runtime.h>
#include <algorithm>
#include <utility>
#include <vector>
@@ -87,12 +86,13 @@ protected:
nvbench::detail::l2flush m_l2flush;
nvbench::blocking_kernel m_blocker;
nvbench::criterion_params m_criterion_params;
nvbench::stopping_criterion_base& m_stopping_criterion;
bool m_run_once{false};
bool m_no_block{false};
nvbench::int64_t m_min_samples{};
nvbench::float64_t m_max_noise{}; // rel stdev
nvbench::float64_t m_min_time{};
nvbench::float64_t m_skip_time{};
nvbench::float64_t m_timeout{};
@@ -102,9 +102,6 @@ protected:
nvbench::float64_t m_total_cpu_time{};
nvbench::float64_t m_cpu_noise{}; // rel stdev
// Trailing history of noise measurements for convergence tests
nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
std::vector<nvbench::float64_t> m_cuda_times;
std::vector<nvbench::float64_t> m_cpu_times;

View File

@@ -22,12 +22,91 @@
#include <nvbench/detail/statistics.cuh>
#include <cstddef>
#include <iterator>
#include <cassert>
#include <vector>
namespace nvbench::detail
{
template <class T>
class ring_buffer_iterator
{
std::ptrdiff_t m_index;
std::ptrdiff_t m_capacity;
T *m_ptr;
public:
using iterator_category = std::random_access_iterator_tag;
using value_type = T;
using difference_type = std::ptrdiff_t;
using pointer = T *;
using reference = T &;
ring_buffer_iterator(std::ptrdiff_t index, std::ptrdiff_t capacity, pointer ptr)
: m_index{index}
, m_capacity{capacity}
, m_ptr{ptr}
{}
ring_buffer_iterator operator++()
{
++m_index;
return *this;
}
ring_buffer_iterator operator++(int)
{
ring_buffer_iterator temp = *this;
++(*this);
return temp;
}
ring_buffer_iterator &operator--()
{
--m_index;
return *this;
}
ring_buffer_iterator operator--(int)
{
ring_buffer_iterator temp = *this;
--(*this);
return temp;
}
ring_buffer_iterator operator+(difference_type n) const
{
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
}
ring_buffer_iterator operator-(difference_type n) const
{
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
}
difference_type operator-(const ring_buffer_iterator &other) const
{
return m_index - other.m_index;
}
reference operator*() const { return m_ptr[m_index % m_capacity]; }
pointer operator->() const { return &(operator*()); }
reference operator[](difference_type n) const { return *(*this + n); }
bool operator==(const ring_buffer_iterator &other) const
{
return m_ptr == other.m_ptr && m_index == other.m_index;
}
bool operator!=(const ring_buffer_iterator &other) const { return !(*this == other); }
bool operator<(const ring_buffer_iterator &other) const { return m_index < other.m_index; }
bool operator>(const ring_buffer_iterator &other) const { return m_index > other.m_index; }
bool operator<=(const ring_buffer_iterator &other) const { return !(*this > other); }
bool operator>=(const ring_buffer_iterator &other) const { return !(*this < other); }
};
/**
* @brief A simple, dynamically sized ring buffer.
*/
@@ -42,7 +121,13 @@ private:
std::size_t m_index{0};
bool m_full{false};
std::size_t get_front_index() const
{
return m_full ? m_index : 0;
}
public:
/**
* Create a new ring buffer with the requested capacity.
*/
@@ -51,17 +136,48 @@ public:
{}
/**
* Iterators provide all values in the ring buffer in unspecified order.
* Iterators provide all values in the ring buffer in FIFO order.
* @{
*/
// clang-format off
[[nodiscard]] auto begin() { return m_buffer.begin(); }
[[nodiscard]] auto begin() const { return m_buffer.begin(); }
[[nodiscard]] auto cbegin() const { return m_buffer.cbegin(); }
[[nodiscard]] auto end() { return m_buffer.begin() + static_cast<diff_t>(this->size()); }
[[nodiscard]] auto end() const { return m_buffer.begin() + static_cast<diff_t>(this->size()); }
[[nodiscard]] auto cend() const { return m_buffer.cbegin() + static_cast<diff_t>(this->size()); }
// clang-format on
[[nodiscard]] ring_buffer_iterator<T> begin()
{
return {static_cast<std::ptrdiff_t>(get_front_index()),
static_cast<std::ptrdiff_t>(capacity()),
m_buffer.data()};
}
[[nodiscard]] ring_buffer_iterator<T> end()
{
return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
static_cast<std::ptrdiff_t>(capacity()),
m_buffer.data()};
}
[[nodiscard]] ring_buffer_iterator<const T> begin() const
{
return {static_cast<std::ptrdiff_t>(get_front_index()),
static_cast<std::ptrdiff_t>(capacity()),
m_buffer.data()};
}
[[nodiscard]] ring_buffer_iterator<const T> end() const
{
return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
static_cast<std::ptrdiff_t>(capacity()),
m_buffer.data()};
}
[[nodiscard]] ring_buffer_iterator<const T> cbegin() const
{
return {static_cast<std::ptrdiff_t>(get_front_index()),
static_cast<std::ptrdiff_t>(capacity()),
m_buffer.data()};
}
[[nodiscard]] ring_buffer_iterator<const T> cend() const
{
return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
static_cast<std::ptrdiff_t>(capacity()),
m_buffer.data()};
}
/** @} */
/**

View File

@@ -18,14 +18,15 @@
#pragma once
#include <nvbench/types.cuh>
#include <nvbench/detail/transform_reduce.cuh>
#include <nvbench/types.cuh>
#include <cmath>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>
#include <type_traits>
namespace nvbench::detail::statistics
@@ -41,7 +42,8 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
{
static_assert(std::is_floating_point_v<ValueType>);
const auto num = last - first;
const auto num = std::distance(first, last);
if (num < 5) // don't bother with low sample sizes.
{
return std::numeric_limits<ValueType>::infinity();
@@ -56,8 +58,135 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
val *= val;
return val;
}) /
static_cast<ValueType>((num - 1));
static_cast<ValueType>((num - 1)); // Bessels correction
return std::sqrt(variance);
}
/**
* Computes and returns the mean.
*
* If the input has fewer than 1 sample, infinity is returned.
*/
template <class It>
nvbench::float64_t compute_mean(It first, It last)
{
const auto num = std::distance(first, last);
if (num < 1)
{
return std::numeric_limits<nvbench::float64_t>::infinity();
}
return std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(num);
}
/**
* Computes linear regression and returns the slope and intercept
*
* This version takes precomputed mean of [first, last).
* If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
*/
template <class It>
std::pair<nvbench::float64_t, nvbench::float64_t>
compute_linear_regression(It first, It last, nvbench::float64_t mean_y)
{
const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
if (n < 2)
{
return std::make_pair(std::numeric_limits<nvbench::float64_t>::infinity(),
std::numeric_limits<nvbench::float64_t>::infinity());
}
// Assuming x starts from 0
const nvbench::float64_t mean_x = (static_cast<nvbench::float64_t>(n) - 1.0) / 2.0;
// Calculate the numerator and denominator for the slope
nvbench::float64_t numerator = 0.0;
nvbench::float64_t denominator = 0.0;
for (std::size_t i = 0; i < n; ++i, ++first)
{
const nvbench::float64_t x_diff = static_cast<nvbench::float64_t>(i) - mean_x;
numerator += x_diff * (*first - mean_y);
denominator += x_diff * x_diff;
}
// Calculate the slope and intercept
const nvbench::float64_t slope = numerator / denominator;
const nvbench::float64_t intercept = mean_y - slope * mean_x;
return std::make_pair(slope, intercept);
}
/**
* Computes linear regression and returns the slope and intercept
*
* If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
*/
template <class It>
std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It first, It last)
{
return compute_linear_regression(first, last, compute_mean(first, last));
}
/**
* Computes and returns the R^2 (coefficient of determination)
*
* This version takes precomputed mean of [first, last).
*/
template <class It>
nvbench::float64_t compute_r2(It first,
It last,
nvbench::float64_t mean_y,
nvbench::float64_t slope,
nvbench::float64_t intercept)
{
const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
nvbench::float64_t ss_tot = 0.0;
nvbench::float64_t ss_res = 0.0;
for (std::size_t i = 0; i < n; ++i, ++first)
{
const nvbench::float64_t y = *first;
const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept;
ss_tot += (y - mean_y) * (y - mean_y);
ss_res += (y - y_pred) * (y - y_pred);
}
if (ss_tot == 0.0)
{
return 1.0;
}
return 1.0 - ss_res / ss_tot;
}
/**
* Computes and returns the R^2 (coefficient of determination)
*/
template <class It>
nvbench::float64_t
compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept)
{
return compute_r2(first, last, compute_mean(first, last), slope, intercept);
}
inline nvbench::float64_t rad2deg(nvbench::float64_t rad)
{
return rad * 180.0 / M_PI;
}
inline nvbench::float64_t slope2rad(nvbench::float64_t slope)
{
return std::atan2(slope, 1.0);
}
inline nvbench::float64_t slope2deg(nvbench::float64_t slope)
{
return rad2deg(slope2rad(slope));
}
} // namespace nvbench::detail::statistics

View File

@@ -0,0 +1,47 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <vector>
namespace nvbench::detail
{
class stdrel_criterion final : public stopping_criterion_base
{
// state
nvbench::int64_t m_total_samples{};
nvbench::float64_t m_total_cuda_time{};
std::vector<nvbench::float64_t> m_cuda_times{};
nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
public:
stdrel_criterion();
protected:
virtual void do_initialize() override;
virtual void do_add_measurement(nvbench::float64_t measurement) override;
virtual bool do_is_finished() override;
};
} // namespace nvbench::detail

View File

@@ -0,0 +1,98 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/stdrel_criterion.cuh>
namespace nvbench::detail
{
stdrel_criterion::stdrel_criterion()
: stopping_criterion_base{"stdrel",
{{"max-noise", nvbench::detail::compat_max_noise()},
{"min-time", nvbench::detail::compat_min_time()}}}
{}
void stdrel_criterion::do_initialize()
{
m_total_samples = 0;
m_total_cuda_time = 0.0;
m_cuda_times.clear();
m_noise_tracker.clear();
}
void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
{
m_total_samples++;
m_total_cuda_time += measurement;
m_cuda_times.push_back(measurement);
// Compute convergence statistics using CUDA timings:
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
m_cuda_times.cend(),
mean_cuda_time);
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
if (std::isfinite(cuda_rel_stdev))
{
m_noise_tracker.push_back(cuda_rel_stdev);
}
}
bool stdrel_criterion::do_is_finished()
{
if (m_total_cuda_time <= m_params.get_float64("min-time"))
{
return false;
}
// Noise has dropped below threshold
if (m_noise_tracker.back() < m_params.get_float64("max-noise"))
{
return true;
}
// Check if the noise (cuda rel stdev) has converged by inspecting a
// trailing window of recorded noise measurements.
// This helps identify benchmarks that are inherently noisy and would
// never converge to the target stdev threshold. This check ensures that the
// benchmark will end if the stdev stabilizes above the target threshold.
// Gather some iterations before checking noise, and limit how often we
// check this.
if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
{
// Use the current noise as the stdev reference.
const auto current_noise = m_noise_tracker.back();
const auto noise_stdev =
nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(),
m_noise_tracker.cend(),
current_noise);
const auto noise_rel_stdev = noise_stdev / current_noise;
// If the rel stdev of the last N cuda noise measurements is less than
// 5%, consider the result stable.
const auto noise_threshold = 0.05;
if (noise_rel_stdev < noise_threshold)
{
return true;
}
}
return false;
}
} // namespace nvbench::detail

View File

@@ -24,6 +24,7 @@
#include <nvbench/callable.cuh>
#include <nvbench/config.cuh>
#include <nvbench/cpu_timer.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/create.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh>

View File

@@ -21,6 +21,7 @@
#include <nvbench/benchmark_base.cuh>
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/csv_printer.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/git_revision.cuh>
#include <nvbench/json_printer.cuh>
#include <nvbench/markdown_printer.cuh>
@@ -376,6 +377,9 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
}
};
const nvbench::criterion_manager::params_description criterion_params =
nvbench::criterion_manager::get().get_params_description();
while (first < last)
{
const auto &arg = *first;
@@ -433,6 +437,12 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
this->enable_run_once();
first += 1;
}
else if (arg == "--stopping-criterion")
{
check_params(1);
this->set_stopping_criterion(first[1]);
first += 2;
}
else if (arg == "--disable-blocking-kernel")
{
this->disable_blocking_kernel();
@@ -504,16 +514,34 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
this->update_int64_prop(first[0], first[1]);
first += 2;
}
else if (arg == "--min-time" || arg == "--max-noise" || arg == "--skip-time" ||
arg == "--timeout")
else if (arg == "--skip-time" || arg == "--timeout")
{
check_params(1);
this->update_float64_prop(first[0], first[1]);
first += 2;
}
else
{
NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
{ // Try criterion params
if (arg.size() < 3 || arg[0] != '-' || arg[1] != '-')
{
NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
}
std::string_view name(arg.c_str() + 2, arg.size() - 2);
auto it = std::find_if(criterion_params.begin(),
criterion_params.end(),
[&name](const auto &param) { return param.first == name; });
if (it != criterion_params.end())
{
check_params(1);
this->update_criterion_prop(first[0], first[1], it->second);
first += 2;
}
else
{
NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
}
}
}
}
@@ -698,6 +726,20 @@ void option_parser::enable_run_once()
bench.set_run_once(true);
}
void option_parser::set_stopping_criterion(const std::string &criterion)
{
// If no active benchmark, save args as global.
if (m_benchmarks.empty())
{
m_global_benchmark_args.push_back("--stopping-criterion");
m_global_benchmark_args.push_back(criterion);
return;
}
benchmark_base &bench = *m_benchmarks.back();
bench.set_stopping_criterion(criterion);
}
void option_parser::disable_blocking_kernel()
{
// If no active benchmark, save args as global.
@@ -933,6 +975,58 @@ catch (std::exception &e)
e.what());
}
void option_parser::update_criterion_prop(
const std::string &prop_arg,
const std::string &prop_val,
const nvbench::named_values::type type)
try
{
// If no active benchmark, save args as global.
if (m_benchmarks.empty())
{
m_global_benchmark_args.push_back(prop_arg);
m_global_benchmark_args.push_back(prop_val);
return;
}
benchmark_base &bench = *m_benchmarks.back();
nvbench::criterion_params& criterion_params = bench.get_criterion_params();
std::string name(prop_arg.begin() + 2, prop_arg.end());
if (type == nvbench::named_values::type::float64)
{
nvbench::float64_t value{};
::parse(prop_val, value);
if (prop_arg == "--max-noise")
{ // Specified as percentage, stored as ratio:
value /= 100.0;
}
criterion_params.set_float64(name, value);
}
else if (type == nvbench::named_values::type::int64)
{
nvbench::int64_t value{};
::parse(prop_val, value);
criterion_params.set_int64(name, value);
}
else if (type == nvbench::named_values::type::string)
{
criterion_params.set_string(name, prop_val);
}
else
{
NVBENCH_THROW(std::runtime_error, "Unrecognized property: `{}`", prop_arg);
}
}
catch (std::exception& e)
{
NVBENCH_THROW(std::runtime_error,
"Error handling option `{} {}`:\n{}",
prop_arg,
prop_val,
e.what());
}
void option_parser::update_float64_prop(const std::string &prop_arg, const std::string &prop_val)
try
{
@@ -948,15 +1042,7 @@ try
nvbench::float64_t value{};
::parse(prop_val, value);
if (prop_arg == "--min-time")
{
bench.set_min_time(value);
}
else if (prop_arg == "--max-noise")
{ // Specified as percentage, stored as ratio:
bench.set_max_noise(value / 100.);
}
else if (prop_arg == "--skip-time")
if (prop_arg == "--skip-time")
{
bench.set_skip_time(value);
}

View File

@@ -20,6 +20,7 @@
#include <nvbench/device_info.cuh>
#include <nvbench/printer_multiplex.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <iosfwd>
#include <memory>
@@ -86,6 +87,7 @@ private:
void set_persistence_mode(const std::string &state);
void lock_gpu_clocks(const std::string &rate);
void set_stopping_criterion(const std::string &criterion);
void enable_run_once();
void disable_blocking_kernel();
@@ -111,6 +113,10 @@ private:
void update_int64_prop(const std::string &prop_arg, const std::string &prop_val);
void update_float64_prop(const std::string &prop_arg, const std::string &prop_val);
void update_criterion_prop(const std::string &prop_arg,
const std::string &prop_val,
const nvbench::named_values::type type);
void update_used_device_state() const;
// Command line args

View File

@@ -24,6 +24,7 @@
#include <nvbench/named_values.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <functional>
#include <optional>
@@ -122,6 +123,17 @@ struct state
void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; }
/// @}
[[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
{
return m_criterion_params;
}
/// Control the stopping criterion for the measurement loop.
/// @{
[[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; }
void set_stopping_criterion(std::string criterion) { m_stopping_criterion = std::move(criterion); }
/// @}
/// If true, the benchmark is only run once, skipping all warmup runs and only
/// executing a single non-batched measurement. This is intended for use with
/// external profiling tools. @{
@@ -135,16 +147,30 @@ struct state
void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; }
/// @}
/// Accumulate at least this many seconds of timing data per measurement. @{
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
/// Accumulate at least this many seconds of timing data per measurement.
/// Only applies to `stdrel` stopping criterion. @{
[[nodiscard]] nvbench::float64_t get_min_time() const
{
return m_criterion_params.get_float64("min-time");
}
void set_min_time(nvbench::float64_t min_time)
{
m_criterion_params.set_float64("min-time", min_time);
}
/// @}
/// Specify the maximum amount of noise if a measurement supports noise.
/// Noise is the relative standard deviation:
/// `noise = stdev / mean_time`. @{
[[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
void set_max_noise(nvbench::float64_t max_noise) { m_max_noise = max_noise; }
/// `noise = stdev / mean_time`.
/// Only applies to `stdrel` stopping criterion. @{
[[nodiscard]] nvbench::float64_t get_max_noise() const
{
return m_criterion_params.get_float64("max-noise");
}
void set_max_noise(nvbench::float64_t max_noise)
{
m_criterion_params.set_float64("max-noise", max_noise);
}
/// @}
/// If a warmup run finishes in less than `skip_time`, the measurement will
@@ -270,9 +296,11 @@ private:
bool m_run_once{false};
bool m_disable_blocking_kernel{false};
nvbench::criterion_params m_criterion_params;
std::string m_stopping_criterion;
nvbench::int64_t m_min_samples;
nvbench::float64_t m_min_time;
nvbench::float64_t m_max_noise;
nvbench::float64_t m_skip_time;
nvbench::float64_t m_timeout;

View File

@@ -36,9 +36,9 @@ state::state(const benchmark_base &bench)
: m_benchmark{bench}
, m_run_once{bench.get_run_once()}
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
, m_criterion_params{bench.get_criterion_params()}
, m_stopping_criterion(bench.get_stopping_criterion())
, m_min_samples{bench.get_min_samples()}
, m_min_time{bench.get_min_time()}
, m_max_noise{bench.get_max_noise()}
, m_skip_time{bench.get_skip_time()}
, m_timeout{bench.get_timeout()}
{}
@@ -53,9 +53,9 @@ state::state(const benchmark_base &bench,
, m_type_config_index{type_config_index}
, m_run_once{bench.get_run_once()}
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
, m_criterion_params{bench.get_criterion_params()}
, m_stopping_criterion(bench.get_stopping_criterion())
, m_min_samples{bench.get_min_samples()}
, m_min_time{bench.get_min_time()}
, m_max_noise{bench.get_max_noise()}
, m_skip_time{bench.get_skip_time()}
, m_timeout{bench.get_timeout()}
{}

View File

@@ -0,0 +1,136 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/named_values.cuh>
#include <nvbench/types.cuh>
#include <string>
#include <initializer_list>
#include <unordered_map>
namespace nvbench
{
namespace detail
{
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation
} // namespace detail
/**
* Stores all the parameters for stopping criterion in use
*/
class criterion_params
{
nvbench::named_values m_named_values;
public:
criterion_params();
criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>);
/**
* Set parameter values from another criterion_params object if they exist
*
* Parameters in `other` that do not correspond to parameters in `this` are ignored.
*/
void set_from(const criterion_params &other);
void set_int64(std::string name, nvbench::int64_t value);
void set_float64(std::string name, nvbench::float64_t value);
void set_string(std::string name, std::string value);
[[nodiscard]] std::vector<std::string> get_names() const;
[[nodiscard]] nvbench::named_values::type get_type(const std::string &name) const;
[[nodiscard]] bool has_value(const std::string &name) const;
[[nodiscard]] nvbench::int64_t get_int64(const std::string &name) const;
[[nodiscard]] nvbench::float64_t get_float64(const std::string &name) const;
[[nodiscard]] std::string get_string(const std::string &name) const;
};
/**
* Stopping criterion interface
*/
class stopping_criterion_base
{
protected:
std::string m_name;
criterion_params m_params;
public:
/**
* @param name Unique name of the criterion
* @param params Default values for all parameters of the criterion
*/
explicit stopping_criterion_base(std::string name, criterion_params params)
: m_name{std::move(name)}
, m_params{std::move(params)}
{}
[[nodiscard]] const std::string &get_name() const { return m_name; }
[[nodiscard]] const criterion_params &get_params() const { return m_params; }
/**
* Initialize the criterion with the given parameters
*
* This method is called once per benchmark run, before any measurements are provided.
*/
void initialize(const criterion_params &params)
{
m_params.set_from(params);
this->do_initialize();
}
/**
* Add the latest measurement to the criterion
*/
void add_measurement(nvbench::float64_t measurement)
{
this->do_add_measurement(measurement);
}
/**
* Check if the criterion has been met for all measurements processed by `add_measurement`
*/
bool is_finished()
{
return this->do_is_finished();
}
protected:
/**
* Initialize the criterion after updaring the parameters
*/
virtual void do_initialize() = 0;
/**
* Add the latest measurement to the criterion
*/
virtual void do_add_measurement(nvbench::float64_t measurement) = 0;
/**
* Check if the criterion has been met for all measurements processed by `add_measurement`
*/
virtual bool do_is_finished() = 0;
};
} // namespace nvbench

View File

@@ -0,0 +1,124 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/throw.cuh>
namespace nvbench
{
// Default constructor for compatibility with old code
criterion_params::criterion_params()
: criterion_params{{"max-noise", nvbench::detail::compat_max_noise()},
{"min-time", nvbench::detail::compat_min_time()}}
{}
criterion_params::criterion_params(
std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>> list)
{
for (const auto &[name, value] : list)
{
m_named_values.set_value(name, value);
}
}
void criterion_params::set_from(const criterion_params &other)
{
for (const std::string &name : this->get_names())
{
if (other.has_value(name))
{
if (this->get_type(name) != other.get_type(name))
{
NVBENCH_THROW(std::runtime_error,
"Mismatched types for named value \"{}\". "
"Expected {}, got {}.",
name,
static_cast<int>(this->get_type(name)),
static_cast<int>(other.get_type(name)));
}
m_named_values.remove_value(name);
m_named_values.set_value(name, other.m_named_values.get_value(name));
}
}
}
void criterion_params::set_int64(std::string name, nvbench::int64_t value)
{
if (m_named_values.has_value(name))
{
m_named_values.remove_value(name);
}
m_named_values.set_int64(name, value);
}
void criterion_params::set_float64(std::string name, nvbench::float64_t value)
{
if (m_named_values.has_value(name))
{
m_named_values.remove_value(name);
}
m_named_values.set_float64(name, value);
}
void criterion_params::set_string(std::string name, std::string value)
{
if (m_named_values.has_value(name))
{
m_named_values.remove_value(name);
}
m_named_values.set_string(name, std::move(value));
}
bool criterion_params::has_value(const std::string &name) const
{
return m_named_values.has_value(name);
}
nvbench::int64_t criterion_params::get_int64(const std::string &name) const
{
return m_named_values.get_int64(name);
}
nvbench::float64_t criterion_params::get_float64(const std::string &name) const
{
return m_named_values.get_float64(name);
}
std::string criterion_params::get_string(const std::string &name) const
{
return m_named_values.get_string(name);
}
std::vector<std::string> criterion_params::get_names() const
{
return m_named_values.get_names();
}
nvbench::named_values::type criterion_params::get_type(const std::string &name) const
{
return m_named_values.get_type(name);
}
} // namespace nvbench::detail

View File

@@ -4,7 +4,10 @@ set(test_srcs
create.cu
cuda_timer.cu
cpu_timer.cu
criterion_manager.cu
criterion_params.cu
enum_type_list.cu
entropy_criterion.cu
float64_axis.cu
int64_axis.cu
named_values.cu
@@ -13,7 +16,9 @@ set(test_srcs
ring_buffer.cu
runner.cu
state.cu
statistics.cu
state_generator.cu
stdrel_criterion.cu
string_axis.cu
type_axis.cu
type_list.cu

View File

@@ -0,0 +1,76 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/criterion_manager.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
void test_standard_criteria_exist()
{
ASSERT(nvbench::criterion_manager::get().get_criterion("stdrel").get_name() == "stdrel");
ASSERT(nvbench::criterion_manager::get().get_criterion("entropy").get_name() == "entropy");
}
class custom_criterion : public nvbench::stopping_criterion_base
{
public:
custom_criterion()
: nvbench::stopping_criterion_base("custom", nvbench::criterion_params{})
{}
protected:
virtual void do_initialize() override {}
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override {}
virtual bool do_is_finished() override { return true; }
};
void test_no_duplicates_are_allowed()
{
nvbench::criterion_manager& manager = nvbench::criterion_manager::get();
bool exception_triggered = false;
try {
nvbench::stopping_criterion_base& custom = manager.get_criterion("custom");
} catch(...) {
exception_triggered = true;
}
ASSERT(exception_triggered);
std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>();
custom_criterion* custom_raw = custom_ptr.get();
ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw);
nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom");
ASSERT(custom_raw == &custom);
exception_triggered = false;
try {
manager.add(std::make_unique<custom_criterion>());
} catch(...) {
exception_triggered = true;
}
ASSERT(exception_triggered);
}
int main()
{
test_standard_criteria_exist();
test_no_duplicates_are_allowed();
}

View File

@@ -0,0 +1,63 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/criterion_manager.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
void test_compat_parameters()
{
nvbench::criterion_params params;
ASSERT(params.has_value("max-noise"));
ASSERT(params.has_value("min-time"));
ASSERT(params.get_float64("max-noise") == nvbench::detail::compat_max_noise());
ASSERT(params.get_float64("min-time") == nvbench::detail::compat_min_time());
}
void test_compat_overwrite()
{
nvbench::criterion_params params;
params.set_float64("max-noise", 40000.0);
params.set_float64("min-time", 42000.0);
ASSERT(params.get_float64("max-noise") == 40000.0);
ASSERT(params.get_float64("min-time") == 42000.0);
}
void test_overwrite()
{
nvbench::criterion_params params;
ASSERT(!params.has_value("custom"));
params.set_float64("custom", 42.0);
ASSERT(params.get_float64("custom") == 42.0);
params.set_float64("custom", 4.2);
ASSERT(params.get_float64("custom") == 4.2);
}
int main()
{
test_compat_parameters();
test_compat_overwrite();
test_overwrite();
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/entropy_criterion.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <vector>
#include <random>
#include <numeric>
void test_const()
{
nvbench::criterion_params params;
nvbench::detail::entropy_criterion criterion;
criterion.initialize(params);
for (int i = 0; i < 6; i++)
{ // nvbench wants at least 5 to compute the standard deviation
criterion.add_measurement(42.0);
}
ASSERT(criterion.is_finished());
}
void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion)
{
/*
* This pattern is designed to simulate the entropy:
*
* 0.0, 1.0, 1.5, 2.0, 2.3, 2.5 <---- no unexpected measurement after this point
* 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+
* 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 |
* 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 |
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
* 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative
* 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 |
* 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+
*/
for (nvbench::float64_t x = 0.0; x < 50.0; x += 1.0)
{
criterion.add_measurement(x > 5.0 ? 5.0 : x);
}
}
void test_entropy_arch()
{
nvbench::detail::entropy_criterion criterion;
// The R2 should be around 0.5
// The angle should be around -1.83
nvbench::criterion_params params;
params.set_float64("min-r2", 0.3);
params.set_float64("max-angle", -1.0);
criterion.initialize(params);
produce_entropy_arch(criterion);
ASSERT(criterion.is_finished());
params.set_float64("min-r2", 0.7);
criterion.initialize(params);
produce_entropy_arch(criterion);
ASSERT(!criterion.is_finished());
params.set_float64("min-r2", 0.3);
params.set_float64("max-angle", -2.0);
criterion.initialize(params);
produce_entropy_arch(criterion);
ASSERT(!criterion.is_finished());
}
int main()
{
test_const();
test_entropy_arch();
}

View File

@@ -1229,6 +1229,27 @@ void test_timeout()
ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
}
void test_stopping_criterion()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench",
"--stopping-criterion", "entropy",
"--max-angle", "0.42",
"--min-r2", "0.6"});
const auto& states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(states[0].get_stopping_criterion() == "entropy");
const nvbench::criterion_params &criterion_params = states[0].get_criterion_params();
ASSERT(criterion_params.has_value("max-angle"));
ASSERT(criterion_params.has_value("min-r2"));
ASSERT(criterion_params.get_float64("max-angle") == 0.42);
ASSERT(criterion_params.get_float64("min-r2") == 0.6);
}
int main()
try
{
@@ -1265,6 +1286,8 @@ try
test_skip_time();
test_timeout();
test_stopping_criterion();
return 0;
}
catch (std::exception &err)

View File

@@ -27,7 +27,7 @@ template <typename T>
bool equal(const nvbench::detail::ring_buffer<T> &buffer,
const std::vector<T> &reference)
{
return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin());
return std::equal(buffer.begin(), buffer.end(), reference.begin());
}
int main()
@@ -62,12 +62,12 @@ try
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 5, " (got {})", avg.back());
ASSERT(equal(avg, {5, 2, -15}));
ASSERT(equal(avg, {2, -15, 5}));
avg.push_back(0);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT(equal(avg, {5, 0, -15}));
ASSERT(equal(avg, {-15, 5, 0}));
ASSERT_MSG(avg.back() == 0, " (got {})", avg.back());
avg.push_back(128);

128
testing/statistics.cu Normal file
View File

@@ -0,0 +1,128 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/statistics.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <vector>
namespace statistics = nvbench::detail::statistics;
void test_mean()
{
{
std::vector<nvbench::float64_t> data{1.0, 2.0, 3.0, 4.0, 5.0};
const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data));
const nvbench::float64_t expected = 3.0;
ASSERT(std::abs(actual - expected) < 0.001);
}
{
std::vector<nvbench::float64_t> data;
const bool finite = std::isfinite(statistics::compute_mean(std::begin(data), std::end(data)));
ASSERT(!finite);
}
}
void test_std()
{
std::vector<nvbench::float64_t> data{1.0, 2.0, 3.0, 4.0, 5.0};
const nvbench::float64_t mean = 3.0;
const nvbench::float64_t actual = statistics::standard_deviation(std::begin(data), std::end(data), mean);
const nvbench::float64_t expected = 1.581;
ASSERT(std::abs(actual - expected) < 0.001);
}
void test_lin_regression()
{
{
std::vector<nvbench::float64_t> ys{1.0, 2.0, 3.0, 4.0, 5.0};
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
ASSERT(slope == 1.0);
ASSERT(intercept == 1.0);
}
{
std::vector<nvbench::float64_t> ys{42.0, 42.0, 42.0};
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
ASSERT(slope == 0.0);
ASSERT(intercept == 42.0);
}
{
std::vector<nvbench::float64_t> ys{8.0, 4.0, 0.0};
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
ASSERT(slope == -4.0);
ASSERT(intercept == 8.0);
}
}
void test_r2()
{
{
std::vector<nvbench::float64_t> ys{1.0, 2.0, 3.0, 4.0, 5.0};
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept);
const nvbench::float64_t expected = 1.0;
ASSERT(std::abs(actual - expected) < 0.001);
}
{
std::vector<nvbench::float64_t> signal{1.0, 2.0, 3.0, 4.0, 5.0};
std::vector<nvbench::float64_t> noise{-1.0, 1.0, -1.0, 1.0, -1.0};
std::vector<nvbench::float64_t> ys(signal.size());
std::transform(std::begin(signal),
std::end(signal),
std::begin(noise),
std::begin(ys),
std::plus<nvbench::float64_t>());
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
const nvbench::float64_t expected = 0.675;
const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept);
ASSERT(std::abs(actual - expected) < 0.001);
}
}
void test_slope_conversion()
{
{
const nvbench::float64_t actual = statistics::slope2deg(0.0);
const nvbench::float64_t expected = 0.0;
ASSERT(std::abs(actual - expected) < 0.001);
}
{
const nvbench::float64_t actual = statistics::slope2deg(1.0);
const nvbench::float64_t expected = 45.0;
ASSERT(std::abs(actual - expected) < 0.001);
}
{
const nvbench::float64_t actual = statistics::slope2deg(5.0);
const nvbench::float64_t expected = 78.69;
ASSERT(std::abs(actual - expected) < 0.001);
}
}
int main()
{
test_mean();
test_std();
test_lin_regression();
test_r2();
test_slope_conversion();
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/stdrel_criterion.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <vector>
#include <random>
#include <numeric>
void test_const()
{
nvbench::criterion_params params;
nvbench::detail::stdrel_criterion criterion;
criterion.initialize(params);
for (int i = 0; i < 5; i++)
{ // nvbench wants at least 5 to compute the standard deviation
criterion.add_measurement(42.0);
}
ASSERT(criterion.is_finished());
}
std::vector<double> generate(double mean, double rel_std_dev, int size)
{
std::random_device rd;
std::mt19937 gen(rd());
std::vector<nvbench::float64_t> v(size);
std::normal_distribution<nvbench::float64_t> dist(mean, mean * rel_std_dev);
std::generate(v.begin(), v.end(), [&]{ return dist(gen); });
return v;
}
void test_stdrel()
{
const nvbench::int64_t size = 10;
const nvbench::float64_t mean = 42.0;
const nvbench::float64_t max_noise = 0.1;
nvbench::criterion_params params;
params.set_float64("max-noise", max_noise);
nvbench::detail::stdrel_criterion criterion;
criterion.initialize(params);
for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size))
{
criterion.add_measurement(measurement);
}
ASSERT(criterion.is_finished());
params.set_float64("max-noise", max_noise);
criterion.initialize(params);
for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size))
{
criterion.add_measurement(measurement);
}
ASSERT(!criterion.is_finished());
}
int main()
{
test_const();
test_stdrel();
}