Implement new convergence check for noisy kernels.

Previously, convergence was tested by waiting for the relative stdev
of cuda timings ("noise") to drop below a certain percentage
(`max_noise`).

This assumed that all benchmarks would eventually see their noise drop
to some threshold, but this is not the case. In practice, many benchmarks
never converge to the default 0.5% relative stdev and instead will always
run to the 15s timeout -- even if the means have converged in a second
or two.

Added a new check that tests when the noise itself stabilizes and ends
the benchmark, even if noise > max_noise.

After testing, this patch alone significantly reduces the runtime of the
Thrust+CUB benchmark suite (from 30 hours to 5 hours) and produces similar
timing results.

The parameters used to tune this feature are not exposed -- if this
approach works long-term and there's a strong motivation to let users
tweak them, then we can worry about names/APIs/CLI/docs later.
This commit is contained in:
Allison Vacanti
2021-12-21 21:16:17 -05:00
parent 8e56a7bd94
commit 178dd0eb68
6 changed files with 382 additions and 87 deletions

View File

@@ -10,6 +10,7 @@ set(test_srcs
named_values.cu
option_parser.cu
range.cu
ring_buffer.cu
runner.cu
state.cu
state_generator.cu
@@ -36,3 +37,4 @@ foreach(test_src IN LISTS test_srcs)
endforeach()
add_subdirectory(cmake)
add_subdirectory(device)

90
testing/ring_buffer.cu Normal file
View File

@@ -0,0 +1,90 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/ring_buffer.cuh>
#include "test_asserts.cuh"
#include <algorithm>
#include <vector>
template <typename T>
bool equal(const nvbench::detail::ring_buffer<T> &buffer,
const std::vector<T> &reference)
{
return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin());
}
int main()
try
{
nvbench::detail::ring_buffer<int> avg(3);
ASSERT(avg.capacity() == 3);
ASSERT(avg.size() == 0);
ASSERT(avg.empty());
ASSERT(equal(avg, {0, 0, 0}));
avg.push_back(32);
ASSERT(!avg.empty());
ASSERT(avg.size() == 1);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 32, " (got {})", avg.back());
ASSERT(equal(avg, {32, 0, 0}));
avg.push_back(2);
ASSERT(avg.size() == 2);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 2, " (got {})", avg.back());
ASSERT(equal(avg, {32, 2, 0}));
avg.push_back(-15);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == -15, " (got {})", avg.back());
ASSERT(equal(avg, {32, 2, -15}));
avg.push_back(5);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 5, " (got {})", avg.back());
ASSERT(equal(avg, {5, 2, -15}));
avg.push_back(0);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT(equal(avg, {5, 0, -15}));
ASSERT_MSG(avg.back() == 0, " (got {})", avg.back());
avg.push_back(128);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT(equal(avg, {5, 0, 128}));
ASSERT_MSG(avg.back() == 128, " (got {})", avg.back());
avg.clear();
ASSERT(avg.empty());
ASSERT(avg.size() == 0);
ASSERT(avg.capacity() == 3);
return 0;
}
catch (std::exception &err)
{
fmt::print(stderr, "{}", err.what());
return 1;
}