mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-06-29 18:57:44 +00:00
* Reduce stdrel criterion complexity and ensure termination
Replace the stdrel criterion's growing sample history with an online
mean/variance accumulator. This keeps the stopping criterion based on
relative standard deviation, preserves the unbiased standard-deviation
estimate used for convergence, and reduces per-sample update work from
recomputing over the full history to constant time.
Add a bounded invalid-noise path so measurements that persistently produce
non-finite relative noise, such as all-zero timings, can terminate without
waiting for the wall-time timeout. Keep the normal min-time gate for ordinary
stdrel convergence.
Add focused tests for the online accumulator, stdrel sample-count threshold,
sample-standard-deviation behavior, deterministic convergence inputs, and
persistent invalid-noise termination. Update the CLI help for the stdrel
termination behavior.
* change max-noise to for consistency
* Use online_mean_variance on m_noise_tracker in is_finished()
Previously, standard deviation call was made using current
noise level instead of mean noise level. Because of identity
E[ (N - C)^2 ] =
E[ (N - E[N])^2 ] + (E[N] - C)^2 >= E[ (N - E[N])^2 ]
this led to criterion terminating later than it could have because
the estimated expectation is always greater of equal that the
estimate relative to the mean.
Code used current noise level instead of mean to avoid needing to
make two passed through m_noise_tracker container.
Use of online_mean_variance allows to improve accuracy of estimating
dispersion of noise signal while maintaining single pass through
container.
* Address review feedback
Fixed misleading commit. Introduce private methods to refactor
computation of repeated expressions.
Renamed m_cuda_times_summary to m_measurements_summary, since
criterion can be applied for CPU-only measurements too.
Introduced is_close utility for checking whether two floating
point numbers are closed to one another.
Introduced descriptive constexpr variables for hard-wired
constants
176 lines
5.3 KiB
Plaintext
176 lines
5.3 KiB
Plaintext
/*
|
|
* Copyright 2023 NVIDIA Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License.
|
|
*
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <nvbench/detail/statistics.cuh>
|
|
#include <nvbench/detail/stdrel_criterion.cuh>
|
|
#include <nvbench/stopping_criterion.cuh>
|
|
#include <nvbench/types.cuh>
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include <vector>
|
|
|
|
#include "test_asserts.cuh"
|
|
|
|
constexpr nvbench::int64_t max_invalid_measurements_cap = 1024;
|
|
|
|
nvbench::int64_t count_invalid_measurements_until_finished(nvbench::float64_t min_time)
|
|
{
|
|
nvbench::criterion_params params;
|
|
params.set_float64("min-time", min_time);
|
|
|
|
nvbench::detail::stdrel_criterion criterion;
|
|
criterion.initialize(params);
|
|
// freshly initialized criterion starts as not is_finished
|
|
ASSERT(!criterion.is_finished());
|
|
|
|
const auto invalid_measurement = nvbench::float64_t{0};
|
|
nvbench::int64_t total_invalid_measurements = 0;
|
|
while (!criterion.is_finished() && total_invalid_measurements < max_invalid_measurements_cap)
|
|
{
|
|
criterion.add_measurement(invalid_measurement);
|
|
++total_invalid_measurements;
|
|
}
|
|
ASSERT(criterion.is_finished());
|
|
return total_invalid_measurements;
|
|
}
|
|
|
|
void test_const()
|
|
{
|
|
nvbench::criterion_params params;
|
|
nvbench::detail::stdrel_criterion criterion;
|
|
using nvbench::detail::statistics::min_samples_for_noise_estimate;
|
|
|
|
criterion.initialize(params);
|
|
for (nvbench::int64_t i = 0; i < min_samples_for_noise_estimate; ++i)
|
|
{
|
|
criterion.add_measurement(42.0);
|
|
}
|
|
ASSERT(criterion.is_finished());
|
|
}
|
|
|
|
void test_stdrel()
|
|
{
|
|
const nvbench::float64_t max_noise = 0.1;
|
|
|
|
nvbench::criterion_params params;
|
|
params.set_float64("max-noise", max_noise);
|
|
params.set_float64("min-time", 0.0);
|
|
|
|
nvbench::detail::stdrel_criterion criterion;
|
|
criterion.initialize(params);
|
|
|
|
using nvbench::detail::statistics::min_samples_for_noise_estimate;
|
|
|
|
std::vector<nvbench::float64_t> low_noise(min_samples_for_noise_estimate, 100.0);
|
|
low_noise.back() = 101.0;
|
|
for (nvbench::float64_t measurement : low_noise)
|
|
{
|
|
criterion.add_measurement(measurement);
|
|
}
|
|
ASSERT(criterion.is_finished());
|
|
|
|
params.set_float64("max-noise", max_noise);
|
|
criterion.initialize(params);
|
|
|
|
std::vector<nvbench::float64_t> high_noise;
|
|
high_noise.reserve(min_samples_for_noise_estimate);
|
|
for (nvbench::int64_t i = 0; i < min_samples_for_noise_estimate; ++i)
|
|
{
|
|
high_noise.push_back(static_cast<nvbench::float64_t>(i + 1) * 10.0);
|
|
}
|
|
for (nvbench::float64_t measurement : high_noise)
|
|
{
|
|
criterion.add_measurement(measurement);
|
|
}
|
|
ASSERT(!criterion.is_finished());
|
|
}
|
|
|
|
void test_stdrel_needs_enough_samples()
|
|
{
|
|
nvbench::criterion_params params;
|
|
params.set_float64("min-time", 0.0);
|
|
|
|
nvbench::detail::stdrel_criterion criterion;
|
|
criterion.initialize(params);
|
|
|
|
using nvbench::detail::statistics::min_samples_for_noise_estimate;
|
|
for (nvbench::int64_t i = 1; i < min_samples_for_noise_estimate; ++i)
|
|
{
|
|
criterion.add_measurement(42.0);
|
|
}
|
|
ASSERT(!criterion.is_finished());
|
|
}
|
|
|
|
void test_stdrel_uses_sample_standard_deviation()
|
|
{
|
|
using nvbench::detail::statistics::min_samples_for_noise_estimate;
|
|
const nvbench::int64_t n = std::max(nvbench::int64_t{26}, min_samples_for_noise_estimate);
|
|
const nvbench::float64_t a = 6;
|
|
const nvbench::float64_t b = 0;
|
|
// for sequence t = a * i + b, 1 <= i <= n
|
|
// mean = a*(n+1)/2 + b
|
|
// variance = a^2/12 * (n^2 - 1)
|
|
// for a, b, n = 6, 0, 26, mean = 81,
|
|
// biased standard deviation = 45 (noise 0.5556)
|
|
// unbiased standard deviation = 45.8912 (noise 0.5666)
|
|
|
|
const nvbench::float64_t biased_noise = std::sqrt(static_cast<nvbench::float64_t>(n - 1) /
|
|
static_cast<nvbench::float64_t>(3 * (n + 1)));
|
|
const nvbench::float64_t unbiased_noise =
|
|
std::sqrt(static_cast<nvbench::float64_t>(n) / static_cast<nvbench::float64_t>(3 * (n + 1)));
|
|
|
|
nvbench::criterion_params params;
|
|
params.set_float64("max-noise", 0.5 * (biased_noise + unbiased_noise));
|
|
params.set_float64("min-time", 0.0);
|
|
|
|
nvbench::detail::stdrel_criterion criterion;
|
|
criterion.initialize(params);
|
|
|
|
for (int i = 1; i <= n; ++i)
|
|
{
|
|
const nvbench::float64_t measurement = a * static_cast<nvbench::float64_t>(i) + b;
|
|
criterion.add_measurement(measurement);
|
|
}
|
|
|
|
ASSERT(!criterion.is_finished());
|
|
}
|
|
|
|
void test_stdrel_finishes_with_persistently_invalid_noise()
|
|
{
|
|
const auto count = count_invalid_measurements_until_finished(0.0);
|
|
ASSERT(count > 1);
|
|
}
|
|
|
|
void test_stdrel_invalid_noise_bypasses_min_time()
|
|
{
|
|
const auto count = count_invalid_measurements_until_finished(1.0);
|
|
ASSERT(count > 0);
|
|
}
|
|
|
|
int main()
|
|
{
|
|
test_const();
|
|
test_stdrel();
|
|
test_stdrel_needs_enough_samples();
|
|
test_stdrel_uses_sample_standard_deviation();
|
|
test_stdrel_finishes_with_persistently_invalid_noise();
|
|
test_stdrel_invalid_noise_bypasses_min_time();
|
|
}
|