mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Merge pull request #151 from gevtushenko/entropy
Entropy-based stopping criterion
This commit is contained in:
63
.clangd
Normal file
63
.clangd
Normal file
@@ -0,0 +1,63 @@
|
||||
# https://clangd.llvm.org/config
|
||||
|
||||
# Apply a config conditionally to all C files
|
||||
If:
|
||||
PathMatch: .*\.(c|h)$
|
||||
|
||||
---
|
||||
|
||||
# Apply a config conditionally to all C++ files
|
||||
If:
|
||||
PathMatch: .*\.(c|h)pp
|
||||
|
||||
---
|
||||
|
||||
# Apply a config conditionally to all CUDA files
|
||||
If:
|
||||
PathMatch: .*\.cuh?
|
||||
CompileFlags:
|
||||
Add:
|
||||
# Allow variadic CUDA functions
|
||||
- "-Xclang=-fcuda-allow-variadic-functions"
|
||||
|
||||
---
|
||||
|
||||
# Tweak the clangd parse settings for all files
|
||||
CompileFlags:
|
||||
Compiler: clang++
|
||||
CompilationDatabase: .
|
||||
Add:
|
||||
- -x
|
||||
- cuda
|
||||
# report all errors
|
||||
- "-ferror-limit=0"
|
||||
- "-ftemplate-backtrace-limit=0"
|
||||
- "-stdlib=libc++"
|
||||
- "-std=c++17"
|
||||
Remove:
|
||||
# strip CUDA fatbin args
|
||||
- "-Xfatbin*"
|
||||
- "-Xcompiler*"
|
||||
- "-Xcudafe*"
|
||||
- "-rdc=*"
|
||||
- "-gpu=*"
|
||||
- "--diag_suppress*"
|
||||
# strip CUDA arch flags
|
||||
- "-gencode*"
|
||||
- "--generate-code*"
|
||||
# strip gcc's -fcoroutines
|
||||
- -fcoroutines
|
||||
# strip CUDA flags unknown to clang
|
||||
- "-ccbin*"
|
||||
- "--compiler-options*"
|
||||
- "--expt-extended-lambda"
|
||||
- "--expt-relaxed-constexpr"
|
||||
- "-forward-unknown-to-host-compiler"
|
||||
- "-Werror=cross-execution-space-call"
|
||||
Diagnostics:
|
||||
Suppress:
|
||||
- "variadic_device_fn"
|
||||
- "attributes_not_allowed"
|
||||
# The NVHPC version of _NVCXX_EXPAND_PACK macro triggers this clang error.
|
||||
# Temporarily suppressing it, but should probably fix
|
||||
- "template_param_shadow"
|
||||
@@ -89,8 +89,15 @@
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--stopping-criterion <criterion>`
|
||||
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
|
||||
samples were collected.
|
||||
* Only applies to Cold measurements.
|
||||
* Default is stdrel (`--stopping-criterion stdrel`)
|
||||
|
||||
* `--min-time <seconds>`
|
||||
* Accumulate at least `<seconds>` of execution time per measurement.
|
||||
* Only applies to `stdrel` stopping criterion.
|
||||
* Default is 0.5 seconds.
|
||||
* If both GPU and CPU times are gathered, this applies to GPU time only.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
@@ -100,6 +107,7 @@
|
||||
* Gather samples until the error in the measurement drops below `<value>`.
|
||||
* Noise is specified as the percent relative standard deviation.
|
||||
* Default is 0.5% (`--max-noise 0.5`)
|
||||
* Only applies to `stdrel` stopping criterion.
|
||||
* Only applies to Cold measurements.
|
||||
* If both GPU and CPU times are gathered, this applies to GPU noise only.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
set(example_srcs
|
||||
auto_throughput.cu
|
||||
axes.cu
|
||||
custom_criterion.cu
|
||||
enums.cu
|
||||
exec_tag_sync.cu
|
||||
exec_tag_timer.cu
|
||||
skip.cu
|
||||
stream.cu
|
||||
throughput.cu
|
||||
auto_throughput.cu
|
||||
)
|
||||
|
||||
# Metatarget for all examples:
|
||||
|
||||
80
examples/custom_criterion.cu
Normal file
80
examples/custom_criterion.cu
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
// Grab some testing kernels from NVBench:
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
|
||||
// Thrust vectors simplify memory management:
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
// Inherit from the stopping_criterion_base class:
|
||||
class fixed_criterion final : public nvbench::stopping_criterion_base
|
||||
{
|
||||
nvbench::int64_t m_num_samples{};
|
||||
|
||||
public:
|
||||
fixed_criterion()
|
||||
: nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}}
|
||||
{}
|
||||
|
||||
protected:
|
||||
// Setup the criterion in the `do_initialize()` method:
|
||||
virtual void do_initialize() override
|
||||
{
|
||||
m_num_samples = 0;
|
||||
}
|
||||
|
||||
// Process new measurements in the `add_measurement()` method:
|
||||
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
|
||||
{
|
||||
m_num_samples++;
|
||||
}
|
||||
|
||||
// Check if the stopping criterion is met in the `is_finished()` method:
|
||||
virtual bool do_is_finished() override
|
||||
{
|
||||
return m_num_samples >= m_params.get_int64("max-samples");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Register the criterion with NVBench:
|
||||
NVBENCH_REGISTER_CRITERION(fixed_criterion);
|
||||
|
||||
void throughput_bench(nvbench::state &state)
|
||||
{
|
||||
// Allocate input data:
|
||||
const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t);
|
||||
thrust::device_vector<nvbench::int32_t> input(num_values);
|
||||
thrust::device_vector<nvbench::int32_t> output(num_values);
|
||||
|
||||
// Provide throughput information:
|
||||
state.add_element_count(num_values, "NumElements");
|
||||
state.add_global_memory_reads<nvbench::int32_t>(num_values, "DataSize");
|
||||
state.add_global_memory_writes<nvbench::int32_t>(num_values);
|
||||
|
||||
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
num_values);
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed");
|
||||
@@ -4,6 +4,7 @@ set(srcs
|
||||
benchmark_base.cxx
|
||||
benchmark_manager.cxx
|
||||
blocking_kernel.cu
|
||||
criterion_manager.cxx
|
||||
csv_printer.cu
|
||||
cuda_call.cu
|
||||
device_info.cu
|
||||
@@ -17,13 +18,16 @@ set(srcs
|
||||
printer_multiplex.cxx
|
||||
runner.cxx
|
||||
state.cxx
|
||||
stopping_criterion.cxx
|
||||
string_axis.cxx
|
||||
type_axis.cxx
|
||||
type_strings.cxx
|
||||
|
||||
detail/entropy_criterion.cxx
|
||||
detail/measure_cold.cu
|
||||
detail/measure_hot.cu
|
||||
detail/state_generator.cxx
|
||||
detail/stdrel_criterion.cxx
|
||||
|
||||
internal/nvml.cxx
|
||||
)
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <functional> // reference_wrapper, ref
|
||||
#include <memory>
|
||||
@@ -181,22 +182,30 @@ struct benchmark_base
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
/// Accumulate at least this many seconds of timing data per measurement.
|
||||
/// Only applies to `stdrel` stopping criterion. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const
|
||||
{
|
||||
return m_criterion_params.get_float64("min-time");
|
||||
}
|
||||
benchmark_base &set_min_time(nvbench::float64_t min_time)
|
||||
{
|
||||
m_min_time = min_time;
|
||||
m_criterion_params.set_float64("min-time", min_time);
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Specify the maximum amount of noise if a measurement supports noise.
|
||||
/// Noise is the relative standard deviation:
|
||||
/// `noise = stdev / mean_time`. @{
|
||||
[[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
|
||||
/// `noise = stdev / mean_time`.
|
||||
/// Only applies to `stdrel` stopping criterion. @{
|
||||
[[nodiscard]] nvbench::float64_t get_max_noise() const
|
||||
{
|
||||
return m_criterion_params.get_float64("max-noise");
|
||||
}
|
||||
benchmark_base &set_max_noise(nvbench::float64_t max_noise)
|
||||
{
|
||||
m_max_noise = max_noise;
|
||||
m_criterion_params.set_float64("max-noise", max_noise);
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
@@ -230,6 +239,19 @@ struct benchmark_base
|
||||
}
|
||||
/// @}
|
||||
|
||||
[[nodiscard]] nvbench::criterion_params& get_criterion_params() { return m_criterion_params; }
|
||||
[[nodiscard]] const nvbench::criterion_params& get_criterion_params() const { return m_criterion_params; }
|
||||
|
||||
/// Control the stopping criterion for the measurement loop.
|
||||
/// @{
|
||||
[[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; }
|
||||
benchmark_base &set_stopping_criterion(std::string criterion)
|
||||
{
|
||||
m_stopping_criterion = std::move(criterion);
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
protected:
|
||||
friend struct nvbench::runner_base;
|
||||
|
||||
@@ -247,12 +269,13 @@ protected:
|
||||
bool m_disable_blocking_kernel{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{10};
|
||||
nvbench::float64_t m_min_time{0.5};
|
||||
nvbench::float64_t m_max_noise{0.005}; // 0.5% relative standard deviation
|
||||
|
||||
nvbench::float64_t m_skip_time{-1.};
|
||||
nvbench::float64_t m_timeout{15.};
|
||||
|
||||
nvbench::criterion_params m_criterion_params;
|
||||
std::string m_stopping_criterion{"stdrel"};
|
||||
|
||||
private:
|
||||
// route these through virtuals so the templated subclass can inject type info
|
||||
virtual std::unique_ptr<benchmark_base> do_clone() const = 0;
|
||||
|
||||
@@ -34,13 +34,14 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
|
||||
result->m_axes = m_axes;
|
||||
result->m_devices = m_devices;
|
||||
|
||||
result->m_min_samples = m_min_samples;
|
||||
result->m_min_time = m_min_time;
|
||||
result->m_max_noise = m_max_noise;
|
||||
result->m_min_samples = m_min_samples;
|
||||
result->m_criterion_params = m_criterion_params;
|
||||
|
||||
result->m_skip_time = m_skip_time;
|
||||
result->m_timeout = m_timeout;
|
||||
|
||||
result->m_stopping_criterion = m_stopping_criterion;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
65
nvbench/criterion_manager.cuh
Normal file
65
nvbench/criterion_manager.cuh
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/detail/entropy_criterion.cuh>
|
||||
#include <nvbench/detail/stdrel_criterion.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
class criterion_manager
|
||||
{
|
||||
std::unordered_map<std::string, std::unique_ptr<nvbench::stopping_criterion_base>> m_map;
|
||||
|
||||
criterion_manager();
|
||||
|
||||
public:
|
||||
/**
|
||||
* @return The singleton criterion_manager instance.
|
||||
*/
|
||||
static criterion_manager& get();
|
||||
|
||||
/**
|
||||
* Register a new stopping criterion.
|
||||
*/
|
||||
nvbench::stopping_criterion_base& add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
|
||||
nvbench::stopping_criterion_base& get_criterion(const std::string& name);
|
||||
const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const;
|
||||
|
||||
using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
|
||||
params_description get_params_description() const;
|
||||
};
|
||||
|
||||
/**
|
||||
* Given a stopping criterion type `TYPE`, registers it in the criterion manager
|
||||
*
|
||||
* See the `custom_criterion.cu` example for usage.
|
||||
*/
|
||||
#define NVBENCH_REGISTER_CRITERION(TYPE) \
|
||||
static nvbench::stopping_criterion_base &NVBENCH_UNIQUE_IDENTIFIER(TYPE) = \
|
||||
nvbench::criterion_manager::get().add(std::make_unique<TYPE>())
|
||||
|
||||
} // namespace nvbench
|
||||
100
nvbench/criterion_manager.cxx
Normal file
100
nvbench/criterion_manager.cxx
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
criterion_manager::criterion_manager()
|
||||
{
|
||||
m_map.emplace("stdrel", std::make_unique<nvbench::detail::stdrel_criterion>());
|
||||
m_map.emplace("entropy", std::make_unique<nvbench::detail::entropy_criterion>());
|
||||
}
|
||||
|
||||
criterion_manager &criterion_manager::get()
|
||||
{
|
||||
static criterion_manager registry;
|
||||
return registry;
|
||||
}
|
||||
|
||||
stopping_criterion_base& criterion_manager::get_criterion(const std::string& name)
|
||||
{
|
||||
auto iter = m_map.find(name);
|
||||
if (iter == m_map.end())
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name);
|
||||
}
|
||||
return *iter->second.get();
|
||||
}
|
||||
|
||||
const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const
|
||||
{
|
||||
auto iter = m_map.find(name);
|
||||
if (iter == m_map.end())
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name);
|
||||
}
|
||||
return *iter->second.get();
|
||||
}
|
||||
|
||||
stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criterion_base> criterion)
|
||||
{
|
||||
const std::string name = criterion->get_name();
|
||||
|
||||
auto [it, success] = m_map.emplace(name, std::move(criterion));
|
||||
|
||||
if (!success)
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"Stopping criterion \"{}\" is already registered.", name);
|
||||
}
|
||||
|
||||
return *it->second.get();
|
||||
}
|
||||
|
||||
nvbench::criterion_manager::params_description criterion_manager::get_params_description() const
|
||||
{
|
||||
nvbench::criterion_manager::params_description desc;
|
||||
|
||||
for (auto &[criterion_name, criterion] : m_map)
|
||||
{
|
||||
nvbench::criterion_params params = criterion->get_params();
|
||||
|
||||
for (auto param : params.get_names())
|
||||
{
|
||||
nvbench::named_values::type type = params.get_type(param);
|
||||
if (std::find_if(desc.begin(), desc.end(), [&](auto d) {
|
||||
return d.first == param && d.second != type;
|
||||
}) != desc.end())
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"Stopping criterion \"{}\" parameter \"{}\" is already used by another "
|
||||
"criterion with a different type.",
|
||||
criterion_name,
|
||||
param);
|
||||
}
|
||||
desc.emplace_back(param, type);
|
||||
}
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
55
nvbench/detail/entropy_criterion.cuh
Normal file
55
nvbench/detail/entropy_criterion.cuh
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
class entropy_criterion final : public stopping_criterion_base
|
||||
{
|
||||
// state
|
||||
nvbench::int64_t m_total_samples{};
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
std::vector<std::pair<nvbench::float64_t, nvbench::int64_t>> m_freq_tracker;
|
||||
|
||||
// TODO The window size should be user-configurable
|
||||
nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299};
|
||||
|
||||
// Used to avoid re-allocating temporary memory
|
||||
std::vector<nvbench::float64_t> m_probabilities;
|
||||
|
||||
nvbench::float64_t compute_entropy();
|
||||
|
||||
public:
|
||||
entropy_criterion();
|
||||
|
||||
protected:
|
||||
virtual void do_initialize() override;
|
||||
virtual void do_add_measurement(nvbench::float64_t measurement) override;
|
||||
virtual bool do_is_finished() override;
|
||||
|
||||
};
|
||||
|
||||
} // namespace nvbench::detail
|
||||
137
nvbench/detail/entropy_criterion.cxx
Normal file
137
nvbench/detail/entropy_criterion.cxx
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/entropy_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
entropy_criterion::entropy_criterion()
|
||||
: stopping_criterion_base{"entropy", {{"max-angle", 0.048}, {"min-r2", 0.36}}}
|
||||
{
|
||||
m_freq_tracker.reserve(m_entropy_tracker.capacity() * 2);
|
||||
m_probabilities.reserve(m_entropy_tracker.capacity() * 2);
|
||||
}
|
||||
|
||||
void entropy_criterion::do_initialize()
|
||||
{
|
||||
m_total_samples = 0;
|
||||
m_total_cuda_time = 0.0;
|
||||
m_entropy_tracker.clear();
|
||||
m_freq_tracker.clear();
|
||||
}
|
||||
|
||||
nvbench::float64_t entropy_criterion::compute_entropy()
|
||||
{
|
||||
const std::size_t n = m_freq_tracker.size();
|
||||
if (n == 0)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
m_probabilities.resize(n);
|
||||
for (std::size_t i = 0; i < n; i++)
|
||||
{
|
||||
m_probabilities[i] = static_cast<nvbench::float64_t>(m_freq_tracker[i].second) /
|
||||
static_cast<nvbench::float64_t>(m_total_samples);
|
||||
}
|
||||
|
||||
nvbench::float64_t entropy{};
|
||||
for (nvbench::float64_t p : m_probabilities)
|
||||
{
|
||||
entropy -= p * std::log2(p);
|
||||
}
|
||||
|
||||
return entropy;
|
||||
}
|
||||
|
||||
void entropy_criterion::do_add_measurement(nvbench::float64_t measurement)
|
||||
{
|
||||
m_total_samples++;
|
||||
m_total_cuda_time += measurement;
|
||||
|
||||
{
|
||||
auto key = measurement;
|
||||
constexpr bool bin_keys = false;
|
||||
|
||||
if (bin_keys)
|
||||
{
|
||||
const auto resolution_us = 0.5;
|
||||
const auto resulution_s = resolution_us / 1'000'000;
|
||||
const auto epsilon = resulution_s * 2;
|
||||
key = std::round(key / epsilon) * epsilon;
|
||||
}
|
||||
|
||||
// This approach is about 3x faster than `std::{unordered_,}map`
|
||||
// Up to 100k samples, only about 20% slower than corresponding stdrel method
|
||||
auto it = std::lower_bound(m_freq_tracker.begin(),
|
||||
m_freq_tracker.end(),
|
||||
std::make_pair(key, nvbench::int64_t{}));
|
||||
|
||||
if (it != m_freq_tracker.end() && it->first == key)
|
||||
{
|
||||
it->second += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_freq_tracker.insert(it, std::make_pair(key, nvbench::int64_t{1}));
|
||||
}
|
||||
}
|
||||
|
||||
m_entropy_tracker.push_back(compute_entropy());
|
||||
}
|
||||
|
||||
bool entropy_criterion::do_is_finished()
|
||||
{
|
||||
if (m_entropy_tracker.size() < 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Even number of samples is used to reduce the overhead and not required to compute entropy.
|
||||
// This makes `is_finished()` about 20% faster than corresponding stdrel method.
|
||||
if (m_total_samples % 2 != 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto begin = m_entropy_tracker.cbegin();
|
||||
auto end = m_entropy_tracker.cend();
|
||||
auto mean = statistics::compute_mean(begin, end);
|
||||
|
||||
const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
|
||||
|
||||
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto r2 = statistics::compute_r2(begin, end, mean, slope, intercept);
|
||||
if (r2 < m_params.get_float64("min-r2"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace nvbench::detail
|
||||
@@ -16,38 +16,37 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/measure_cold.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/detail/measure_cold.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/printer_base.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <stdexcept>
|
||||
#include <variant>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
measure_cold_base::measure_cold_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_launch{m_state.get_cuda_stream()}
|
||||
, m_criterion_params{exec_state.get_criterion_params()}
|
||||
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())}
|
||||
, m_run_once{exec_state.get_run_once()}
|
||||
, m_no_block{exec_state.get_disable_blocking_kernel()}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_max_noise{exec_state.get_max_noise()}
|
||||
, m_min_time{exec_state.get_min_time()}
|
||||
, m_skip_time{exec_state.get_skip_time()}
|
||||
, m_timeout{exec_state.get_timeout()}
|
||||
{}
|
||||
{
|
||||
if (m_min_samples > 0)
|
||||
{
|
||||
m_cuda_times.reserve(m_min_samples);
|
||||
m_cpu_times.reserve(m_min_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void measure_cold_base::check()
|
||||
{
|
||||
@@ -68,10 +67,11 @@ void measure_cold_base::initialize()
|
||||
m_total_cpu_time = 0.;
|
||||
m_cpu_noise = 0.;
|
||||
m_total_samples = 0;
|
||||
m_noise_tracker.clear();
|
||||
m_cuda_times.clear();
|
||||
m_cpu_times.clear();
|
||||
m_max_time_exceeded = false;
|
||||
|
||||
m_stopping_criterion.initialize(m_criterion_params);
|
||||
}
|
||||
|
||||
void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
|
||||
@@ -87,16 +87,7 @@ void measure_cold_base::record_measurements()
|
||||
m_total_cpu_time += cur_cpu_time;
|
||||
++m_total_samples;
|
||||
|
||||
// Compute convergence statistics using CUDA timings:
|
||||
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
mean_cuda_time);
|
||||
auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
if (std::isfinite(cuda_rel_stdev))
|
||||
{
|
||||
m_noise_tracker.push_back(cuda_rel_stdev);
|
||||
}
|
||||
m_stopping_criterion.add_measurement(cur_cuda_time);
|
||||
}
|
||||
|
||||
bool measure_cold_base::is_finished()
|
||||
@@ -107,39 +98,12 @@ bool measure_cold_base::is_finished()
|
||||
}
|
||||
|
||||
// Check that we've gathered enough samples:
|
||||
if (m_total_cuda_time > m_min_time && m_total_samples > m_min_samples)
|
||||
if (m_total_samples > m_min_samples)
|
||||
{
|
||||
// Noise has dropped below threshold
|
||||
if (m_noise_tracker.back() < m_max_noise)
|
||||
if (m_stopping_criterion.is_finished())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the noise (cuda rel stdev) has converged by inspecting a
|
||||
// trailing window of recorded noise measurements.
|
||||
// This helps identify benchmarks that are inherently noisy and would
|
||||
// never converge to the target stdev threshold. This check ensures that the
|
||||
// benchmark will end if the stdev stabilizes above the target threshold.
|
||||
// Gather some iterations before checking noise, and limit how often we
|
||||
// check this.
|
||||
if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
|
||||
{
|
||||
// Use the current noise as the stdev reference.
|
||||
const auto current_noise = m_noise_tracker.back();
|
||||
const auto noise_stdev =
|
||||
nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(),
|
||||
m_noise_tracker.cend(),
|
||||
current_noise);
|
||||
const auto noise_rel_stdev = noise_stdev / current_noise;
|
||||
|
||||
// If the rel stdev of the last N cuda noise measurements is less than
|
||||
// 5%, consider the result stable.
|
||||
const auto noise_threshold = 0.05;
|
||||
if (noise_rel_stdev < noise_threshold)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for timeouts:
|
||||
@@ -206,14 +170,21 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
mean_cuda_time);
|
||||
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
const auto noise = cuda_rel_stdev;
|
||||
const auto max_noise = m_criterion_params.get_float64("max-noise");
|
||||
const auto min_time = m_criterion_params.get_float64("min-time");
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated GPU times");
|
||||
summ.set_float64("value",
|
||||
m_noise_tracker.empty() ? std::numeric_limits<nvbench::float64_t>::infinity()
|
||||
: m_noise_tracker.back());
|
||||
summ.set_float64("value", noise);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
@@ -270,15 +241,15 @@ void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto timeout = m_walltime_timer.get_duration();
|
||||
|
||||
if (!m_noise_tracker.empty() && m_noise_tracker.back() > m_max_noise)
|
||||
if (noise > max_noise)
|
||||
{
|
||||
printer.log(nvbench::log_level::warn,
|
||||
fmt::format("Current measurement timed out ({:0.2f}s) "
|
||||
"while over noise threshold ({:0.2f}% > "
|
||||
"{:0.2f}%)",
|
||||
timeout,
|
||||
m_noise_tracker.back() * 100,
|
||||
m_max_noise * 100));
|
||||
noise * 100,
|
||||
max_noise * 100));
|
||||
}
|
||||
if (m_total_samples < m_min_samples)
|
||||
{
|
||||
@@ -289,7 +260,7 @@ void measure_cold_base::generate_summaries()
|
||||
m_total_samples,
|
||||
m_min_samples));
|
||||
}
|
||||
if (m_total_cuda_time < m_min_time)
|
||||
if (m_total_cuda_time < min_time)
|
||||
{
|
||||
printer.log(nvbench::log_level::warn,
|
||||
fmt::format("Current measurement timed out ({:0.2f}s) "
|
||||
@@ -297,7 +268,7 @@ void measure_cold_base::generate_summaries()
|
||||
"{:0.2f}s)",
|
||||
timeout,
|
||||
m_total_cuda_time,
|
||||
m_min_time));
|
||||
min_time));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,15 +25,14 @@
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
|
||||
#include <nvbench/detail/l2flush.cuh>
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -87,12 +86,13 @@ protected:
|
||||
nvbench::detail::l2flush m_l2flush;
|
||||
nvbench::blocking_kernel m_blocker;
|
||||
|
||||
nvbench::criterion_params m_criterion_params;
|
||||
nvbench::stopping_criterion_base& m_stopping_criterion;
|
||||
|
||||
bool m_run_once{false};
|
||||
bool m_no_block{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{};
|
||||
nvbench::float64_t m_max_noise{}; // rel stdev
|
||||
nvbench::float64_t m_min_time{};
|
||||
|
||||
nvbench::float64_t m_skip_time{};
|
||||
nvbench::float64_t m_timeout{};
|
||||
@@ -102,9 +102,6 @@ protected:
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
nvbench::float64_t m_cpu_noise{}; // rel stdev
|
||||
|
||||
// Trailing history of noise measurements for convergence tests
|
||||
nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
|
||||
|
||||
std::vector<nvbench::float64_t> m_cuda_times;
|
||||
std::vector<nvbench::float64_t> m_cpu_times;
|
||||
|
||||
|
||||
@@ -22,12 +22,91 @@
|
||||
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
|
||||
#include <cstddef>
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
template <class T>
|
||||
class ring_buffer_iterator
|
||||
{
|
||||
std::ptrdiff_t m_index;
|
||||
std::ptrdiff_t m_capacity;
|
||||
T *m_ptr;
|
||||
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
using value_type = T;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = T *;
|
||||
using reference = T &;
|
||||
|
||||
ring_buffer_iterator(std::ptrdiff_t index, std::ptrdiff_t capacity, pointer ptr)
|
||||
: m_index{index}
|
||||
, m_capacity{capacity}
|
||||
, m_ptr{ptr}
|
||||
{}
|
||||
|
||||
ring_buffer_iterator operator++()
|
||||
{
|
||||
++m_index;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ring_buffer_iterator operator++(int)
|
||||
{
|
||||
ring_buffer_iterator temp = *this;
|
||||
++(*this);
|
||||
return temp;
|
||||
}
|
||||
|
||||
ring_buffer_iterator &operator--()
|
||||
{
|
||||
--m_index;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ring_buffer_iterator operator--(int)
|
||||
{
|
||||
ring_buffer_iterator temp = *this;
|
||||
--(*this);
|
||||
return temp;
|
||||
}
|
||||
|
||||
ring_buffer_iterator operator+(difference_type n) const
|
||||
{
|
||||
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
|
||||
}
|
||||
|
||||
ring_buffer_iterator operator-(difference_type n) const
|
||||
{
|
||||
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
|
||||
}
|
||||
|
||||
difference_type operator-(const ring_buffer_iterator &other) const
|
||||
{
|
||||
return m_index - other.m_index;
|
||||
}
|
||||
|
||||
reference operator*() const { return m_ptr[m_index % m_capacity]; }
|
||||
pointer operator->() const { return &(operator*()); }
|
||||
|
||||
reference operator[](difference_type n) const { return *(*this + n); }
|
||||
|
||||
bool operator==(const ring_buffer_iterator &other) const
|
||||
{
|
||||
return m_ptr == other.m_ptr && m_index == other.m_index;
|
||||
}
|
||||
bool operator!=(const ring_buffer_iterator &other) const { return !(*this == other); }
|
||||
bool operator<(const ring_buffer_iterator &other) const { return m_index < other.m_index; }
|
||||
bool operator>(const ring_buffer_iterator &other) const { return m_index > other.m_index; }
|
||||
bool operator<=(const ring_buffer_iterator &other) const { return !(*this > other); }
|
||||
bool operator>=(const ring_buffer_iterator &other) const { return !(*this < other); }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A simple, dynamically sized ring buffer.
|
||||
*/
|
||||
@@ -42,7 +121,13 @@ private:
|
||||
std::size_t m_index{0};
|
||||
bool m_full{false};
|
||||
|
||||
std::size_t get_front_index() const
|
||||
{
|
||||
return m_full ? m_index : 0;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Create a new ring buffer with the requested capacity.
|
||||
*/
|
||||
@@ -51,17 +136,48 @@ public:
|
||||
{}
|
||||
|
||||
/**
|
||||
* Iterators provide all values in the ring buffer in unspecified order.
|
||||
* Iterators provide all values in the ring buffer in FIFO order.
|
||||
* @{
|
||||
*/
|
||||
// clang-format off
|
||||
[[nodiscard]] auto begin() { return m_buffer.begin(); }
|
||||
[[nodiscard]] auto begin() const { return m_buffer.begin(); }
|
||||
[[nodiscard]] auto cbegin() const { return m_buffer.cbegin(); }
|
||||
[[nodiscard]] auto end() { return m_buffer.begin() + static_cast<diff_t>(this->size()); }
|
||||
[[nodiscard]] auto end() const { return m_buffer.begin() + static_cast<diff_t>(this->size()); }
|
||||
[[nodiscard]] auto cend() const { return m_buffer.cbegin() + static_cast<diff_t>(this->size()); }
|
||||
// clang-format on
|
||||
[[nodiscard]] ring_buffer_iterator<T> begin()
|
||||
{
|
||||
return {static_cast<std::ptrdiff_t>(get_front_index()),
|
||||
static_cast<std::ptrdiff_t>(capacity()),
|
||||
m_buffer.data()};
|
||||
}
|
||||
|
||||
[[nodiscard]] ring_buffer_iterator<T> end()
|
||||
{
|
||||
return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
|
||||
static_cast<std::ptrdiff_t>(capacity()),
|
||||
m_buffer.data()};
|
||||
}
|
||||
[[nodiscard]] ring_buffer_iterator<const T> begin() const
|
||||
{
|
||||
return {static_cast<std::ptrdiff_t>(get_front_index()),
|
||||
static_cast<std::ptrdiff_t>(capacity()),
|
||||
m_buffer.data()};
|
||||
}
|
||||
|
||||
[[nodiscard]] ring_buffer_iterator<const T> end() const
|
||||
{
|
||||
return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
|
||||
static_cast<std::ptrdiff_t>(capacity()),
|
||||
m_buffer.data()};
|
||||
}
|
||||
[[nodiscard]] ring_buffer_iterator<const T> cbegin() const
|
||||
{
|
||||
return {static_cast<std::ptrdiff_t>(get_front_index()),
|
||||
static_cast<std::ptrdiff_t>(capacity()),
|
||||
m_buffer.data()};
|
||||
}
|
||||
|
||||
[[nodiscard]] ring_buffer_iterator<const T> cend() const
|
||||
{
|
||||
return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
|
||||
static_cast<std::ptrdiff_t>(capacity()),
|
||||
m_buffer.data()};
|
||||
}
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
|
||||
@@ -18,14 +18,15 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <nvbench/detail/transform_reduce.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace nvbench::detail::statistics
|
||||
@@ -41,7 +42,8 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
|
||||
{
|
||||
static_assert(std::is_floating_point_v<ValueType>);
|
||||
|
||||
const auto num = last - first;
|
||||
const auto num = std::distance(first, last);
|
||||
|
||||
if (num < 5) // don't bother with low sample sizes.
|
||||
{
|
||||
return std::numeric_limits<ValueType>::infinity();
|
||||
@@ -56,8 +58,135 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
|
||||
val *= val;
|
||||
return val;
|
||||
}) /
|
||||
static_cast<ValueType>((num - 1));
|
||||
static_cast<ValueType>((num - 1)); // Bessel’s correction
|
||||
return std::sqrt(variance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes and returns the mean.
|
||||
*
|
||||
* If the input has fewer than 1 sample, infinity is returned.
|
||||
*/
|
||||
template <class It>
|
||||
nvbench::float64_t compute_mean(It first, It last)
|
||||
{
|
||||
const auto num = std::distance(first, last);
|
||||
|
||||
if (num < 1)
|
||||
{
|
||||
return std::numeric_limits<nvbench::float64_t>::infinity();
|
||||
}
|
||||
|
||||
return std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(num);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes linear regression and returns the slope and intercept
|
||||
*
|
||||
* This version takes precomputed mean of [first, last).
|
||||
* If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
|
||||
*/
|
||||
template <class It>
|
||||
std::pair<nvbench::float64_t, nvbench::float64_t>
|
||||
compute_linear_regression(It first, It last, nvbench::float64_t mean_y)
|
||||
{
|
||||
const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
|
||||
|
||||
if (n < 2)
|
||||
{
|
||||
return std::make_pair(std::numeric_limits<nvbench::float64_t>::infinity(),
|
||||
std::numeric_limits<nvbench::float64_t>::infinity());
|
||||
}
|
||||
|
||||
// Assuming x starts from 0
|
||||
const nvbench::float64_t mean_x = (static_cast<nvbench::float64_t>(n) - 1.0) / 2.0;
|
||||
|
||||
// Calculate the numerator and denominator for the slope
|
||||
nvbench::float64_t numerator = 0.0;
|
||||
nvbench::float64_t denominator = 0.0;
|
||||
|
||||
for (std::size_t i = 0; i < n; ++i, ++first)
|
||||
{
|
||||
const nvbench::float64_t x_diff = static_cast<nvbench::float64_t>(i) - mean_x;
|
||||
numerator += x_diff * (*first - mean_y);
|
||||
denominator += x_diff * x_diff;
|
||||
}
|
||||
|
||||
// Calculate the slope and intercept
|
||||
const nvbench::float64_t slope = numerator / denominator;
|
||||
const nvbench::float64_t intercept = mean_y - slope * mean_x;
|
||||
|
||||
return std::make_pair(slope, intercept);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes linear regression and returns the slope and intercept
|
||||
*
|
||||
* If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
|
||||
*/
|
||||
template <class It>
|
||||
std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It first, It last)
|
||||
{
|
||||
return compute_linear_regression(first, last, compute_mean(first, last));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes and returns the R^2 (coefficient of determination)
|
||||
*
|
||||
* This version takes precomputed mean of [first, last).
|
||||
*/
|
||||
template <class It>
|
||||
nvbench::float64_t compute_r2(It first,
|
||||
It last,
|
||||
nvbench::float64_t mean_y,
|
||||
nvbench::float64_t slope,
|
||||
nvbench::float64_t intercept)
|
||||
{
|
||||
const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
|
||||
|
||||
nvbench::float64_t ss_tot = 0.0;
|
||||
nvbench::float64_t ss_res = 0.0;
|
||||
|
||||
for (std::size_t i = 0; i < n; ++i, ++first)
|
||||
{
|
||||
const nvbench::float64_t y = *first;
|
||||
const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept;
|
||||
|
||||
ss_tot += (y - mean_y) * (y - mean_y);
|
||||
ss_res += (y - y_pred) * (y - y_pred);
|
||||
}
|
||||
|
||||
if (ss_tot == 0.0)
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
return 1.0 - ss_res / ss_tot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes and returns the R^2 (coefficient of determination)
|
||||
*/
|
||||
template <class It>
|
||||
nvbench::float64_t
|
||||
compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept)
|
||||
{
|
||||
return compute_r2(first, last, compute_mean(first, last), slope, intercept);
|
||||
}
|
||||
|
||||
inline nvbench::float64_t rad2deg(nvbench::float64_t rad)
|
||||
{
|
||||
return rad * 180.0 / M_PI;
|
||||
}
|
||||
|
||||
inline nvbench::float64_t slope2rad(nvbench::float64_t slope)
|
||||
{
|
||||
return std::atan2(slope, 1.0);
|
||||
}
|
||||
|
||||
inline nvbench::float64_t slope2deg(nvbench::float64_t slope)
|
||||
{
|
||||
return rad2deg(slope2rad(slope));
|
||||
}
|
||||
|
||||
} // namespace nvbench::detail::statistics
|
||||
|
||||
47
nvbench/detail/stdrel_criterion.cuh
Normal file
47
nvbench/detail/stdrel_criterion.cuh
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
class stdrel_criterion final : public stopping_criterion_base
|
||||
{
|
||||
// state
|
||||
nvbench::int64_t m_total_samples{};
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
std::vector<nvbench::float64_t> m_cuda_times{};
|
||||
nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
|
||||
|
||||
public:
|
||||
stdrel_criterion();
|
||||
|
||||
protected:
|
||||
virtual void do_initialize() override;
|
||||
virtual void do_add_measurement(nvbench::float64_t measurement) override;
|
||||
virtual bool do_is_finished() override;
|
||||
};
|
||||
|
||||
} // namespace nvbench::detail
|
||||
98
nvbench/detail/stdrel_criterion.cxx
Normal file
98
nvbench/detail/stdrel_criterion.cxx
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/stdrel_criterion.cuh>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
stdrel_criterion::stdrel_criterion()
|
||||
: stopping_criterion_base{"stdrel",
|
||||
{{"max-noise", nvbench::detail::compat_max_noise()},
|
||||
{"min-time", nvbench::detail::compat_min_time()}}}
|
||||
{}
|
||||
|
||||
void stdrel_criterion::do_initialize()
|
||||
{
|
||||
m_total_samples = 0;
|
||||
m_total_cuda_time = 0.0;
|
||||
m_cuda_times.clear();
|
||||
m_noise_tracker.clear();
|
||||
}
|
||||
|
||||
void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
|
||||
{
|
||||
m_total_samples++;
|
||||
m_total_cuda_time += measurement;
|
||||
m_cuda_times.push_back(measurement);
|
||||
|
||||
// Compute convergence statistics using CUDA timings:
|
||||
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
mean_cuda_time);
|
||||
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
if (std::isfinite(cuda_rel_stdev))
|
||||
{
|
||||
m_noise_tracker.push_back(cuda_rel_stdev);
|
||||
}
|
||||
}
|
||||
|
||||
bool stdrel_criterion::do_is_finished()
|
||||
{
|
||||
if (m_total_cuda_time <= m_params.get_float64("min-time"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Noise has dropped below threshold
|
||||
if (m_noise_tracker.back() < m_params.get_float64("max-noise"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the noise (cuda rel stdev) has converged by inspecting a
|
||||
// trailing window of recorded noise measurements.
|
||||
// This helps identify benchmarks that are inherently noisy and would
|
||||
// never converge to the target stdev threshold. This check ensures that the
|
||||
// benchmark will end if the stdev stabilizes above the target threshold.
|
||||
// Gather some iterations before checking noise, and limit how often we
|
||||
// check this.
|
||||
if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
|
||||
{
|
||||
// Use the current noise as the stdev reference.
|
||||
const auto current_noise = m_noise_tracker.back();
|
||||
const auto noise_stdev =
|
||||
nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(),
|
||||
m_noise_tracker.cend(),
|
||||
current_noise);
|
||||
const auto noise_rel_stdev = noise_stdev / current_noise;
|
||||
|
||||
// If the rel stdev of the last N cuda noise measurements is less than
|
||||
// 5%, consider the result stable.
|
||||
const auto noise_threshold = 0.05;
|
||||
if (noise_rel_stdev < noise_threshold)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace nvbench::detail
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/create.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/benchmark_manager.cuh>
|
||||
#include <nvbench/csv_printer.cuh>
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/git_revision.cuh>
|
||||
#include <nvbench/json_printer.cuh>
|
||||
#include <nvbench/markdown_printer.cuh>
|
||||
@@ -376,6 +377,9 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
}
|
||||
};
|
||||
|
||||
const nvbench::criterion_manager::params_description criterion_params =
|
||||
nvbench::criterion_manager::get().get_params_description();
|
||||
|
||||
while (first < last)
|
||||
{
|
||||
const auto &arg = *first;
|
||||
@@ -433,6 +437,12 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
this->enable_run_once();
|
||||
first += 1;
|
||||
}
|
||||
else if (arg == "--stopping-criterion")
|
||||
{
|
||||
check_params(1);
|
||||
this->set_stopping_criterion(first[1]);
|
||||
first += 2;
|
||||
}
|
||||
else if (arg == "--disable-blocking-kernel")
|
||||
{
|
||||
this->disable_blocking_kernel();
|
||||
@@ -504,16 +514,34 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
this->update_int64_prop(first[0], first[1]);
|
||||
first += 2;
|
||||
}
|
||||
else if (arg == "--min-time" || arg == "--max-noise" || arg == "--skip-time" ||
|
||||
arg == "--timeout")
|
||||
else if (arg == "--skip-time" || arg == "--timeout")
|
||||
{
|
||||
check_params(1);
|
||||
this->update_float64_prop(first[0], first[1]);
|
||||
first += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
|
||||
{ // Try criterion params
|
||||
if (arg.size() < 3 || arg[0] != '-' || arg[1] != '-')
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
|
||||
}
|
||||
|
||||
std::string_view name(arg.c_str() + 2, arg.size() - 2);
|
||||
auto it = std::find_if(criterion_params.begin(),
|
||||
criterion_params.end(),
|
||||
[&name](const auto ¶m) { return param.first == name; });
|
||||
|
||||
if (it != criterion_params.end())
|
||||
{
|
||||
check_params(1);
|
||||
this->update_criterion_prop(first[0], first[1], it->second);
|
||||
first += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -698,6 +726,20 @@ void option_parser::enable_run_once()
|
||||
bench.set_run_once(true);
|
||||
}
|
||||
|
||||
void option_parser::set_stopping_criterion(const std::string &criterion)
|
||||
{
|
||||
// If no active benchmark, save args as global.
|
||||
if (m_benchmarks.empty())
|
||||
{
|
||||
m_global_benchmark_args.push_back("--stopping-criterion");
|
||||
m_global_benchmark_args.push_back(criterion);
|
||||
return;
|
||||
}
|
||||
|
||||
benchmark_base &bench = *m_benchmarks.back();
|
||||
bench.set_stopping_criterion(criterion);
|
||||
}
|
||||
|
||||
void option_parser::disable_blocking_kernel()
|
||||
{
|
||||
// If no active benchmark, save args as global.
|
||||
@@ -933,6 +975,58 @@ catch (std::exception &e)
|
||||
e.what());
|
||||
}
|
||||
|
||||
void option_parser::update_criterion_prop(
|
||||
const std::string &prop_arg,
|
||||
const std::string &prop_val,
|
||||
const nvbench::named_values::type type)
|
||||
try
|
||||
{
|
||||
// If no active benchmark, save args as global.
|
||||
if (m_benchmarks.empty())
|
||||
{
|
||||
m_global_benchmark_args.push_back(prop_arg);
|
||||
m_global_benchmark_args.push_back(prop_val);
|
||||
return;
|
||||
}
|
||||
|
||||
benchmark_base &bench = *m_benchmarks.back();
|
||||
nvbench::criterion_params& criterion_params = bench.get_criterion_params();
|
||||
std::string name(prop_arg.begin() + 2, prop_arg.end());
|
||||
if (type == nvbench::named_values::type::float64)
|
||||
{
|
||||
nvbench::float64_t value{};
|
||||
::parse(prop_val, value);
|
||||
|
||||
if (prop_arg == "--max-noise")
|
||||
{ // Specified as percentage, stored as ratio:
|
||||
value /= 100.0;
|
||||
}
|
||||
criterion_params.set_float64(name, value);
|
||||
}
|
||||
else if (type == nvbench::named_values::type::int64)
|
||||
{
|
||||
nvbench::int64_t value{};
|
||||
::parse(prop_val, value);
|
||||
criterion_params.set_int64(name, value);
|
||||
}
|
||||
else if (type == nvbench::named_values::type::string)
|
||||
{
|
||||
criterion_params.set_string(name, prop_val);
|
||||
}
|
||||
else
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "Unrecognized property: `{}`", prop_arg);
|
||||
}
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"Error handling option `{} {}`:\n{}",
|
||||
prop_arg,
|
||||
prop_val,
|
||||
e.what());
|
||||
}
|
||||
|
||||
void option_parser::update_float64_prop(const std::string &prop_arg, const std::string &prop_val)
|
||||
try
|
||||
{
|
||||
@@ -948,15 +1042,7 @@ try
|
||||
|
||||
nvbench::float64_t value{};
|
||||
::parse(prop_val, value);
|
||||
if (prop_arg == "--min-time")
|
||||
{
|
||||
bench.set_min_time(value);
|
||||
}
|
||||
else if (prop_arg == "--max-noise")
|
||||
{ // Specified as percentage, stored as ratio:
|
||||
bench.set_max_noise(value / 100.);
|
||||
}
|
||||
else if (prop_arg == "--skip-time")
|
||||
if (prop_arg == "--skip-time")
|
||||
{
|
||||
bench.set_skip_time(value);
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/printer_multiplex.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
@@ -86,6 +87,7 @@ private:
|
||||
void set_persistence_mode(const std::string &state);
|
||||
void lock_gpu_clocks(const std::string &rate);
|
||||
|
||||
void set_stopping_criterion(const std::string &criterion);
|
||||
void enable_run_once();
|
||||
void disable_blocking_kernel();
|
||||
|
||||
@@ -111,6 +113,10 @@ private:
|
||||
void update_int64_prop(const std::string &prop_arg, const std::string &prop_val);
|
||||
void update_float64_prop(const std::string &prop_arg, const std::string &prop_val);
|
||||
|
||||
void update_criterion_prop(const std::string &prop_arg,
|
||||
const std::string &prop_val,
|
||||
const nvbench::named_values::type type);
|
||||
|
||||
void update_used_device_state() const;
|
||||
|
||||
// Command line args
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
@@ -122,6 +123,17 @@ struct state
|
||||
void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; }
|
||||
/// @}
|
||||
|
||||
[[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
|
||||
{
|
||||
return m_criterion_params;
|
||||
}
|
||||
|
||||
/// Control the stopping criterion for the measurement loop.
|
||||
/// @{
|
||||
[[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; }
|
||||
void set_stopping_criterion(std::string criterion) { m_stopping_criterion = std::move(criterion); }
|
||||
/// @}
|
||||
|
||||
/// If true, the benchmark is only run once, skipping all warmup runs and only
|
||||
/// executing a single non-batched measurement. This is intended for use with
|
||||
/// external profiling tools. @{
|
||||
@@ -135,16 +147,30 @@ struct state
|
||||
void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; }
|
||||
/// @}
|
||||
|
||||
/// Accumulate at least this many seconds of timing data per measurement. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
|
||||
void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
|
||||
/// Accumulate at least this many seconds of timing data per measurement.
|
||||
/// Only applies to `stdrel` stopping criterion. @{
|
||||
[[nodiscard]] nvbench::float64_t get_min_time() const
|
||||
{
|
||||
return m_criterion_params.get_float64("min-time");
|
||||
}
|
||||
void set_min_time(nvbench::float64_t min_time)
|
||||
{
|
||||
m_criterion_params.set_float64("min-time", min_time);
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Specify the maximum amount of noise if a measurement supports noise.
|
||||
/// Noise is the relative standard deviation:
|
||||
/// `noise = stdev / mean_time`. @{
|
||||
[[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
|
||||
void set_max_noise(nvbench::float64_t max_noise) { m_max_noise = max_noise; }
|
||||
/// `noise = stdev / mean_time`.
|
||||
/// Only applies to `stdrel` stopping criterion. @{
|
||||
[[nodiscard]] nvbench::float64_t get_max_noise() const
|
||||
{
|
||||
return m_criterion_params.get_float64("max-noise");
|
||||
}
|
||||
void set_max_noise(nvbench::float64_t max_noise)
|
||||
{
|
||||
m_criterion_params.set_float64("max-noise", max_noise);
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If a warmup run finishes in less than `skip_time`, the measurement will
|
||||
@@ -270,9 +296,11 @@ private:
|
||||
bool m_run_once{false};
|
||||
bool m_disable_blocking_kernel{false};
|
||||
|
||||
|
||||
nvbench::criterion_params m_criterion_params;
|
||||
std::string m_stopping_criterion;
|
||||
|
||||
nvbench::int64_t m_min_samples;
|
||||
nvbench::float64_t m_min_time;
|
||||
nvbench::float64_t m_max_noise;
|
||||
|
||||
nvbench::float64_t m_skip_time;
|
||||
nvbench::float64_t m_timeout;
|
||||
|
||||
@@ -36,9 +36,9 @@ state::state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
|
||||
, m_criterion_params{bench.get_criterion_params()}
|
||||
, m_stopping_criterion(bench.get_stopping_criterion())
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
, m_skip_time{bench.get_skip_time()}
|
||||
, m_timeout{bench.get_timeout()}
|
||||
{}
|
||||
@@ -53,9 +53,9 @@ state::state(const benchmark_base &bench,
|
||||
, m_type_config_index{type_config_index}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
|
||||
, m_criterion_params{bench.get_criterion_params()}
|
||||
, m_stopping_criterion(bench.get_stopping_criterion())
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
, m_max_noise{bench.get_max_noise()}
|
||||
, m_skip_time{bench.get_skip_time()}
|
||||
, m_timeout{bench.get_timeout()}
|
||||
{}
|
||||
|
||||
136
nvbench/stopping_criterion.cuh
Normal file
136
nvbench/stopping_criterion.cuh
Normal file
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <initializer_list>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
|
||||
constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* Stores all the parameters for stopping criterion in use
|
||||
*/
|
||||
class criterion_params
|
||||
{
|
||||
nvbench::named_values m_named_values;
|
||||
public:
|
||||
criterion_params();
|
||||
criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>);
|
||||
|
||||
/**
|
||||
* Set parameter values from another criterion_params object if they exist
|
||||
*
|
||||
* Parameters in `other` that do not correspond to parameters in `this` are ignored.
|
||||
*/
|
||||
void set_from(const criterion_params &other);
|
||||
|
||||
void set_int64(std::string name, nvbench::int64_t value);
|
||||
void set_float64(std::string name, nvbench::float64_t value);
|
||||
void set_string(std::string name, std::string value);
|
||||
|
||||
[[nodiscard]] std::vector<std::string> get_names() const;
|
||||
[[nodiscard]] nvbench::named_values::type get_type(const std::string &name) const;
|
||||
|
||||
[[nodiscard]] bool has_value(const std::string &name) const;
|
||||
[[nodiscard]] nvbench::int64_t get_int64(const std::string &name) const;
|
||||
[[nodiscard]] nvbench::float64_t get_float64(const std::string &name) const;
|
||||
[[nodiscard]] std::string get_string(const std::string &name) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* Stopping criterion interface
|
||||
*/
|
||||
class stopping_criterion_base
|
||||
{
|
||||
protected:
|
||||
std::string m_name;
|
||||
criterion_params m_params;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @param name Unique name of the criterion
|
||||
* @param params Default values for all parameters of the criterion
|
||||
*/
|
||||
explicit stopping_criterion_base(std::string name, criterion_params params)
|
||||
: m_name{std::move(name)}
|
||||
, m_params{std::move(params)}
|
||||
{}
|
||||
|
||||
[[nodiscard]] const std::string &get_name() const { return m_name; }
|
||||
[[nodiscard]] const criterion_params &get_params() const { return m_params; }
|
||||
|
||||
/**
|
||||
* Initialize the criterion with the given parameters
|
||||
*
|
||||
* This method is called once per benchmark run, before any measurements are provided.
|
||||
*/
|
||||
void initialize(const criterion_params ¶ms)
|
||||
{
|
||||
m_params.set_from(params);
|
||||
this->do_initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the latest measurement to the criterion
|
||||
*/
|
||||
void add_measurement(nvbench::float64_t measurement)
|
||||
{
|
||||
this->do_add_measurement(measurement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the criterion has been met for all measurements processed by `add_measurement`
|
||||
*/
|
||||
bool is_finished()
|
||||
{
|
||||
return this->do_is_finished();
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Initialize the criterion after updaring the parameters
|
||||
*/
|
||||
virtual void do_initialize() = 0;
|
||||
|
||||
/**
|
||||
* Add the latest measurement to the criterion
|
||||
*/
|
||||
virtual void do_add_measurement(nvbench::float64_t measurement) = 0;
|
||||
|
||||
/**
|
||||
* Check if the criterion has been met for all measurements processed by `add_measurement`
|
||||
*/
|
||||
virtual bool do_is_finished() = 0;
|
||||
};
|
||||
|
||||
} // namespace nvbench
|
||||
124
nvbench/stopping_criterion.cxx
Normal file
124
nvbench/stopping_criterion.cxx
Normal file
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
// Default constructor for compatibility with old code
|
||||
criterion_params::criterion_params()
|
||||
: criterion_params{{"max-noise", nvbench::detail::compat_max_noise()},
|
||||
{"min-time", nvbench::detail::compat_min_time()}}
|
||||
{}
|
||||
|
||||
criterion_params::criterion_params(
|
||||
std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>> list)
|
||||
{
|
||||
for (const auto &[name, value] : list)
|
||||
{
|
||||
m_named_values.set_value(name, value);
|
||||
}
|
||||
}
|
||||
|
||||
void criterion_params::set_from(const criterion_params &other)
|
||||
{
|
||||
for (const std::string &name : this->get_names())
|
||||
{
|
||||
if (other.has_value(name))
|
||||
{
|
||||
if (this->get_type(name) != other.get_type(name))
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"Mismatched types for named value \"{}\". "
|
||||
"Expected {}, got {}.",
|
||||
name,
|
||||
static_cast<int>(this->get_type(name)),
|
||||
static_cast<int>(other.get_type(name)));
|
||||
}
|
||||
m_named_values.remove_value(name);
|
||||
m_named_values.set_value(name, other.m_named_values.get_value(name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void criterion_params::set_int64(std::string name, nvbench::int64_t value)
|
||||
{
|
||||
if (m_named_values.has_value(name))
|
||||
{
|
||||
m_named_values.remove_value(name);
|
||||
}
|
||||
|
||||
m_named_values.set_int64(name, value);
|
||||
}
|
||||
|
||||
void criterion_params::set_float64(std::string name, nvbench::float64_t value)
|
||||
{
|
||||
if (m_named_values.has_value(name))
|
||||
{
|
||||
m_named_values.remove_value(name);
|
||||
}
|
||||
|
||||
m_named_values.set_float64(name, value);
|
||||
}
|
||||
|
||||
void criterion_params::set_string(std::string name, std::string value)
|
||||
{
|
||||
if (m_named_values.has_value(name))
|
||||
{
|
||||
m_named_values.remove_value(name);
|
||||
}
|
||||
|
||||
m_named_values.set_string(name, std::move(value));
|
||||
}
|
||||
|
||||
bool criterion_params::has_value(const std::string &name) const
|
||||
{
|
||||
return m_named_values.has_value(name);
|
||||
}
|
||||
|
||||
nvbench::int64_t criterion_params::get_int64(const std::string &name) const
|
||||
{
|
||||
return m_named_values.get_int64(name);
|
||||
}
|
||||
|
||||
nvbench::float64_t criterion_params::get_float64(const std::string &name) const
|
||||
{
|
||||
return m_named_values.get_float64(name);
|
||||
}
|
||||
|
||||
std::string criterion_params::get_string(const std::string &name) const
|
||||
{
|
||||
return m_named_values.get_string(name);
|
||||
}
|
||||
|
||||
std::vector<std::string> criterion_params::get_names() const
|
||||
{
|
||||
return m_named_values.get_names();
|
||||
}
|
||||
|
||||
nvbench::named_values::type criterion_params::get_type(const std::string &name) const
|
||||
{
|
||||
return m_named_values.get_type(name);
|
||||
}
|
||||
|
||||
|
||||
} // namespace nvbench::detail
|
||||
@@ -4,7 +4,10 @@ set(test_srcs
|
||||
create.cu
|
||||
cuda_timer.cu
|
||||
cpu_timer.cu
|
||||
criterion_manager.cu
|
||||
criterion_params.cu
|
||||
enum_type_list.cu
|
||||
entropy_criterion.cu
|
||||
float64_axis.cu
|
||||
int64_axis.cu
|
||||
named_values.cu
|
||||
@@ -13,7 +16,9 @@ set(test_srcs
|
||||
ring_buffer.cu
|
||||
runner.cu
|
||||
state.cu
|
||||
statistics.cu
|
||||
state_generator.cu
|
||||
stdrel_criterion.cu
|
||||
string_axis.cu
|
||||
type_axis.cu
|
||||
type_list.cu
|
||||
|
||||
76
testing/criterion_manager.cu
Normal file
76
testing/criterion_manager.cu
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_standard_criteria_exist()
|
||||
{
|
||||
ASSERT(nvbench::criterion_manager::get().get_criterion("stdrel").get_name() == "stdrel");
|
||||
ASSERT(nvbench::criterion_manager::get().get_criterion("entropy").get_name() == "entropy");
|
||||
}
|
||||
|
||||
class custom_criterion : public nvbench::stopping_criterion_base
|
||||
{
|
||||
public:
|
||||
custom_criterion()
|
||||
: nvbench::stopping_criterion_base("custom", nvbench::criterion_params{})
|
||||
{}
|
||||
|
||||
protected:
|
||||
virtual void do_initialize() override {}
|
||||
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override {}
|
||||
virtual bool do_is_finished() override { return true; }
|
||||
};
|
||||
|
||||
void test_no_duplicates_are_allowed()
|
||||
{
|
||||
nvbench::criterion_manager& manager = nvbench::criterion_manager::get();
|
||||
bool exception_triggered = false;
|
||||
|
||||
try {
|
||||
nvbench::stopping_criterion_base& custom = manager.get_criterion("custom");
|
||||
} catch(...) {
|
||||
exception_triggered = true;
|
||||
}
|
||||
ASSERT(exception_triggered);
|
||||
|
||||
std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>();
|
||||
custom_criterion* custom_raw = custom_ptr.get();
|
||||
ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw);
|
||||
|
||||
nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom");
|
||||
ASSERT(custom_raw == &custom);
|
||||
|
||||
exception_triggered = false;
|
||||
try {
|
||||
manager.add(std::make_unique<custom_criterion>());
|
||||
} catch(...) {
|
||||
exception_triggered = true;
|
||||
}
|
||||
ASSERT(exception_triggered);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_standard_criteria_exist();
|
||||
test_no_duplicates_are_allowed();
|
||||
}
|
||||
|
||||
63
testing/criterion_params.cu
Normal file
63
testing/criterion_params.cu
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_compat_parameters()
|
||||
{
|
||||
nvbench::criterion_params params;
|
||||
|
||||
ASSERT(params.has_value("max-noise"));
|
||||
ASSERT(params.has_value("min-time"));
|
||||
|
||||
ASSERT(params.get_float64("max-noise") == nvbench::detail::compat_max_noise());
|
||||
ASSERT(params.get_float64("min-time") == nvbench::detail::compat_min_time());
|
||||
}
|
||||
|
||||
void test_compat_overwrite()
|
||||
{
|
||||
nvbench::criterion_params params;
|
||||
params.set_float64("max-noise", 40000.0);
|
||||
params.set_float64("min-time", 42000.0);
|
||||
|
||||
ASSERT(params.get_float64("max-noise") == 40000.0);
|
||||
ASSERT(params.get_float64("min-time") == 42000.0);
|
||||
}
|
||||
|
||||
void test_overwrite()
|
||||
{
|
||||
nvbench::criterion_params params;
|
||||
ASSERT(!params.has_value("custom"));
|
||||
|
||||
params.set_float64("custom", 42.0);
|
||||
ASSERT(params.get_float64("custom") == 42.0);
|
||||
|
||||
params.set_float64("custom", 4.2);
|
||||
ASSERT(params.get_float64("custom") == 4.2);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_compat_parameters();
|
||||
test_compat_overwrite();
|
||||
test_overwrite();
|
||||
}
|
||||
|
||||
91
testing/entropy_criterion.cu
Normal file
91
testing/entropy_criterion.cu
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/entropy_criterion.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <numeric>
|
||||
|
||||
void test_const()
|
||||
{
|
||||
nvbench::criterion_params params;
|
||||
nvbench::detail::entropy_criterion criterion;
|
||||
|
||||
criterion.initialize(params);
|
||||
for (int i = 0; i < 6; i++)
|
||||
{ // nvbench wants at least 5 to compute the standard deviation
|
||||
criterion.add_measurement(42.0);
|
||||
}
|
||||
ASSERT(criterion.is_finished());
|
||||
}
|
||||
|
||||
void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion)
|
||||
{
|
||||
/*
|
||||
* This pattern is designed to simulate the entropy:
|
||||
*
|
||||
* 0.0, 1.0, 1.5, 2.0, 2.3, 2.5 <---- no unexpected measurement after this point
|
||||
* 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+
|
||||
* 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 |
|
||||
* 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 |
|
||||
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
|
||||
* 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative
|
||||
* 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 |
|
||||
* 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+
|
||||
*/
|
||||
for (nvbench::float64_t x = 0.0; x < 50.0; x += 1.0)
|
||||
{
|
||||
criterion.add_measurement(x > 5.0 ? 5.0 : x);
|
||||
}
|
||||
}
|
||||
|
||||
void test_entropy_arch()
|
||||
{
|
||||
nvbench::detail::entropy_criterion criterion;
|
||||
|
||||
// The R2 should be around 0.5
|
||||
// The angle should be around -1.83
|
||||
nvbench::criterion_params params;
|
||||
params.set_float64("min-r2", 0.3);
|
||||
params.set_float64("max-angle", -1.0);
|
||||
criterion.initialize(params);
|
||||
produce_entropy_arch(criterion);
|
||||
ASSERT(criterion.is_finished());
|
||||
|
||||
params.set_float64("min-r2", 0.7);
|
||||
criterion.initialize(params);
|
||||
produce_entropy_arch(criterion);
|
||||
ASSERT(!criterion.is_finished());
|
||||
|
||||
params.set_float64("min-r2", 0.3);
|
||||
params.set_float64("max-angle", -2.0);
|
||||
criterion.initialize(params);
|
||||
produce_entropy_arch(criterion);
|
||||
ASSERT(!criterion.is_finished());
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_const();
|
||||
test_entropy_arch();
|
||||
}
|
||||
@@ -1229,6 +1229,27 @@ void test_timeout()
|
||||
ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
|
||||
}
|
||||
|
||||
void test_stopping_criterion()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench",
|
||||
"--stopping-criterion", "entropy",
|
||||
"--max-angle", "0.42",
|
||||
"--min-r2", "0.6"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(states[0].get_stopping_criterion() == "entropy");
|
||||
|
||||
const nvbench::criterion_params &criterion_params = states[0].get_criterion_params();
|
||||
ASSERT(criterion_params.has_value("max-angle"));
|
||||
ASSERT(criterion_params.has_value("min-r2"));
|
||||
|
||||
ASSERT(criterion_params.get_float64("max-angle") == 0.42);
|
||||
ASSERT(criterion_params.get_float64("min-r2") == 0.6);
|
||||
}
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
@@ -1265,6 +1286,8 @@ try
|
||||
test_skip_time();
|
||||
test_timeout();
|
||||
|
||||
test_stopping_criterion();
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception &err)
|
||||
|
||||
@@ -27,7 +27,7 @@ template <typename T>
|
||||
bool equal(const nvbench::detail::ring_buffer<T> &buffer,
|
||||
const std::vector<T> &reference)
|
||||
{
|
||||
return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin());
|
||||
return std::equal(buffer.begin(), buffer.end(), reference.begin());
|
||||
}
|
||||
|
||||
int main()
|
||||
@@ -62,12 +62,12 @@ try
|
||||
ASSERT(avg.size() == 3);
|
||||
ASSERT(avg.capacity() == 3);
|
||||
ASSERT_MSG(avg.back() == 5, " (got {})", avg.back());
|
||||
ASSERT(equal(avg, {5, 2, -15}));
|
||||
ASSERT(equal(avg, {2, -15, 5}));
|
||||
|
||||
avg.push_back(0);
|
||||
ASSERT(avg.size() == 3);
|
||||
ASSERT(avg.capacity() == 3);
|
||||
ASSERT(equal(avg, {5, 0, -15}));
|
||||
ASSERT(equal(avg, {-15, 5, 0}));
|
||||
ASSERT_MSG(avg.back() == 0, " (got {})", avg.back());
|
||||
|
||||
avg.push_back(128);
|
||||
|
||||
128
testing/statistics.cu
Normal file
128
testing/statistics.cu
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace statistics = nvbench::detail::statistics;
|
||||
|
||||
void test_mean()
|
||||
{
|
||||
{
|
||||
std::vector<nvbench::float64_t> data{1.0, 2.0, 3.0, 4.0, 5.0};
|
||||
const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data));
|
||||
const nvbench::float64_t expected = 3.0;
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<nvbench::float64_t> data;
|
||||
const bool finite = std::isfinite(statistics::compute_mean(std::begin(data), std::end(data)));
|
||||
ASSERT(!finite);
|
||||
}
|
||||
}
|
||||
|
||||
void test_std()
|
||||
{
|
||||
std::vector<nvbench::float64_t> data{1.0, 2.0, 3.0, 4.0, 5.0};
|
||||
const nvbench::float64_t mean = 3.0;
|
||||
const nvbench::float64_t actual = statistics::standard_deviation(std::begin(data), std::end(data), mean);
|
||||
const nvbench::float64_t expected = 1.581;
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
|
||||
void test_lin_regression()
|
||||
{
|
||||
{
|
||||
std::vector<nvbench::float64_t> ys{1.0, 2.0, 3.0, 4.0, 5.0};
|
||||
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
|
||||
ASSERT(slope == 1.0);
|
||||
ASSERT(intercept == 1.0);
|
||||
}
|
||||
{
|
||||
std::vector<nvbench::float64_t> ys{42.0, 42.0, 42.0};
|
||||
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
|
||||
ASSERT(slope == 0.0);
|
||||
ASSERT(intercept == 42.0);
|
||||
}
|
||||
{
|
||||
std::vector<nvbench::float64_t> ys{8.0, 4.0, 0.0};
|
||||
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
|
||||
ASSERT(slope == -4.0);
|
||||
ASSERT(intercept == 8.0);
|
||||
}
|
||||
}
|
||||
|
||||
void test_r2()
|
||||
{
|
||||
{
|
||||
std::vector<nvbench::float64_t> ys{1.0, 2.0, 3.0, 4.0, 5.0};
|
||||
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
|
||||
const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept);
|
||||
const nvbench::float64_t expected = 1.0;
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
{
|
||||
std::vector<nvbench::float64_t> signal{1.0, 2.0, 3.0, 4.0, 5.0};
|
||||
std::vector<nvbench::float64_t> noise{-1.0, 1.0, -1.0, 1.0, -1.0};
|
||||
std::vector<nvbench::float64_t> ys(signal.size());
|
||||
|
||||
std::transform(std::begin(signal),
|
||||
std::end(signal),
|
||||
std::begin(noise),
|
||||
std::begin(ys),
|
||||
std::plus<nvbench::float64_t>());
|
||||
|
||||
auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
|
||||
const nvbench::float64_t expected = 0.675;
|
||||
const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept);
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
}
|
||||
|
||||
void test_slope_conversion()
|
||||
{
|
||||
{
|
||||
const nvbench::float64_t actual = statistics::slope2deg(0.0);
|
||||
const nvbench::float64_t expected = 0.0;
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
{
|
||||
const nvbench::float64_t actual = statistics::slope2deg(1.0);
|
||||
const nvbench::float64_t expected = 45.0;
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
{
|
||||
const nvbench::float64_t actual = statistics::slope2deg(5.0);
|
||||
const nvbench::float64_t expected = 78.69;
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_mean();
|
||||
test_std();
|
||||
test_lin_regression();
|
||||
test_r2();
|
||||
test_slope_conversion();
|
||||
}
|
||||
84
testing/stdrel_criterion.cu
Normal file
84
testing/stdrel_criterion.cu
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/stdrel_criterion.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <numeric>
|
||||
|
||||
void test_const()
|
||||
{
|
||||
nvbench::criterion_params params;
|
||||
nvbench::detail::stdrel_criterion criterion;
|
||||
|
||||
criterion.initialize(params);
|
||||
for (int i = 0; i < 5; i++)
|
||||
{ // nvbench wants at least 5 to compute the standard deviation
|
||||
criterion.add_measurement(42.0);
|
||||
}
|
||||
ASSERT(criterion.is_finished());
|
||||
}
|
||||
|
||||
std::vector<double> generate(double mean, double rel_std_dev, int size)
|
||||
{
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::vector<nvbench::float64_t> v(size);
|
||||
std::normal_distribution<nvbench::float64_t> dist(mean, mean * rel_std_dev);
|
||||
std::generate(v.begin(), v.end(), [&]{ return dist(gen); });
|
||||
return v;
|
||||
}
|
||||
|
||||
void test_stdrel()
|
||||
{
|
||||
const nvbench::int64_t size = 10;
|
||||
const nvbench::float64_t mean = 42.0;
|
||||
const nvbench::float64_t max_noise = 0.1;
|
||||
|
||||
nvbench::criterion_params params;
|
||||
params.set_float64("max-noise", max_noise);
|
||||
|
||||
nvbench::detail::stdrel_criterion criterion;
|
||||
criterion.initialize(params);
|
||||
|
||||
for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size))
|
||||
{
|
||||
criterion.add_measurement(measurement);
|
||||
}
|
||||
ASSERT(criterion.is_finished());
|
||||
|
||||
params.set_float64("max-noise", max_noise);
|
||||
criterion.initialize(params);
|
||||
|
||||
for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size))
|
||||
{
|
||||
criterion.add_measurement(measurement);
|
||||
}
|
||||
ASSERT(!criterion.is_finished());
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_const();
|
||||
test_stdrel();
|
||||
}
|
||||
Reference in New Issue
Block a user