Merge pull request #64 from allisonvacanti/noise_convergence

New convergence check
This commit is contained in:
Allison Vacanti
2021-12-21 21:30:39 -05:00
committed by GitHub
12 changed files with 555 additions and 91 deletions

View File

@@ -33,6 +33,10 @@ option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON)
option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default})
option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF)
option(NVBench_ENABLE_DEVICE_TESTING
"Include tests that require a GPU (with locked clocks)."
OFF
)
option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF)
include(cmake/NVBenchConfigTarget.cmake)

View File

@@ -19,12 +19,14 @@
#include <nvbench/detail/measure_cold.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/printer_base.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#include <algorithm>
@@ -43,7 +45,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
, m_min_time{exec_state.get_min_time()}
, m_skip_time{exec_state.get_skip_time()}
, m_timeout{exec_state.get_timeout()}
{ }
{}
void measure_cold_base::check()
{
@@ -62,6 +64,113 @@ void measure_cold_base::check()
}
}
void measure_cold_base::initialize()
{
m_total_cuda_time = 0.;
m_total_cpu_time = 0.;
m_cpu_noise = 0.;
m_total_samples = 0;
m_noise_tracker.clear();
m_cuda_times.clear();
m_cpu_times.clear();
m_max_time_exceeded = false;
}
void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
void measure_cold_base::record_measurements()
{
// Update and record timers and counters:
const auto cur_cuda_time = m_cuda_timer.get_duration();
const auto cur_cpu_time = m_cpu_timer.get_duration();
m_cuda_times.push_back(cur_cuda_time);
m_cpu_times.push_back(cur_cpu_time);
m_total_cuda_time += cur_cuda_time;
m_total_cpu_time += cur_cpu_time;
++m_total_samples;
// Compute convergence statistics using CUDA timings:
const auto mean_cuda_time = m_total_cuda_time /
static_cast<nvbench::float64_t>(m_total_samples);
const auto cuda_stdev =
nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
m_cuda_times.cend(),
mean_cuda_time);
auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
if (std::isfinite(cuda_rel_stdev))
{
m_noise_tracker.push_back(cuda_rel_stdev);
}
}
bool measure_cold_base::is_finished()
{
if (m_run_once)
{
return true;
}
// Check that we've gathered enough samples:
if (m_total_cuda_time > m_min_time && m_total_samples > m_min_samples)
{
// Noise has dropped below threshold
if (m_noise_tracker.back() < m_max_noise)
{
return true;
}
// Check if the noise (cuda rel stdev) has converged by inspecting a
// trailing window of recorded noise measurements.
// This helps identify benchmarks that are inherently noisy and would
// never converge to the target stdev threshold. This check ensures that the
// benchmark will end if the stdev stabilizes above the target threshold.
// Gather some iterations before checking noise, and limit how often we
// check this.
if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
{
// Use the current noise as the stdev reference.
const auto current_noise = m_noise_tracker.back();
const auto noise_stdev = nvbench::detail::statistics::standard_deviation(
m_noise_tracker.cbegin(),
m_noise_tracker.cend(),
current_noise);
const auto noise_rel_stdev = noise_stdev / current_noise;
// If the rel stdev of the last N cuda noise measurements is less than
// 5%, consider the result stable.
const auto noise_threshold = 0.05;
if (noise_rel_stdev < noise_threshold)
{
return true;
}
}
}
// Check for timeouts:
m_walltime_timer.stop();
if (m_walltime_timer.get_duration() > m_timeout)
{
m_max_time_exceeded = true;
return true;
}
return false;
}
void measure_cold_base::run_trials_epilogue()
{
// Only need to compute this at the end, not per iteration.
const auto cpu_mean = m_total_cuda_time /
static_cast<nvbench::float64_t>(m_total_samples);
const auto cpu_stdev =
nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(),
m_cpu_times.cend(),
m_total_cpu_time);
m_cpu_noise = cpu_stdev / cpu_mean;
m_walltime_timer.stop();
}
void measure_cold_base::generate_summaries()
{
const auto d_samples = static_cast<double>(m_total_samples);
@@ -113,7 +222,10 @@ void measure_cold_base::generate_summaries()
summ.set_string("description",
"Relative standard deviation of the cold GPU execution "
"time measurements.");
summ.set_float64("value", m_cuda_noise);
summ.set_float64("value",
m_noise_tracker.empty()
? std::numeric_limits<nvbench::float64_t>::infinity()
: m_noise_tracker.back());
}
if (const auto items = m_state.get_element_count(); items != 0)
@@ -161,16 +273,16 @@ void measure_cold_base::generate_summaries()
if (m_max_time_exceeded)
{
const auto timeout = m_timeout_timer.get_duration();
const auto timeout = m_walltime_timer.get_duration();
if (m_cuda_noise > m_max_noise)
if (!m_noise_tracker.empty() && m_noise_tracker.back() > m_max_noise)
{
printer.log(nvbench::log_level::warn,
fmt::format("Current measurement timed out ({:0.2f}s) "
"while over noise threshold ({:0.2f}% > "
"{:0.2f}%)",
timeout,
m_cuda_noise * 100,
m_noise_tracker.back() * 100,
m_max_noise * 100));
}
if (m_total_samples < m_min_samples)

View File

@@ -28,6 +28,7 @@
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/l2flush.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/detail/statistics.cuh>
#include <cuda_runtime.h>
@@ -58,19 +59,11 @@ protected:
struct kernel_launch_timer;
void check();
void initialize()
{
m_total_cuda_time = 0.;
m_total_cpu_time = 0.;
m_cuda_noise = 0.;
m_cpu_noise = 0.;
m_total_samples = 0;
m_cuda_times.clear();
m_cpu_times.clear();
m_max_time_exceeded = false;
}
void initialize();
void run_trials_prologue();
void record_measurements();
bool is_finished();
void run_trials_epilogue();
void generate_summaries();
void check_skip_time(nvbench::float64_t warmup_time);
@@ -86,7 +79,6 @@ protected:
}
void block_stream();
__forceinline__ void unblock_stream() { m_blocker.unblock(); }
nvbench::state &m_state;
@@ -94,7 +86,7 @@ protected:
nvbench::launch m_launch;
nvbench::cuda_timer m_cuda_timer;
nvbench::cpu_timer m_cpu_timer;
nvbench::cpu_timer m_timeout_timer;
nvbench::cpu_timer m_walltime_timer;
nvbench::detail::l2flush m_l2flush;
nvbench::blocking_kernel m_blocker;
@@ -110,8 +102,10 @@ protected:
nvbench::int64_t m_total_samples{};
nvbench::float64_t m_total_cuda_time{};
nvbench::float64_t m_total_cpu_time{};
nvbench::float64_t m_cuda_noise{}; // rel stdev
nvbench::float64_t m_cpu_noise{}; // rel stdev
nvbench::float64_t m_cpu_noise{}; // rel stdev
// Trailing history of noise measurements for convergence tests
nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
std::vector<nvbench::float64_t> m_cuda_times;
std::vector<nvbench::float64_t> m_cpu_times;
@@ -170,7 +164,11 @@ struct measure_cold : public measure_cold_base
this->check();
this->initialize();
this->run_warmup();
this->run_trials_prologue();
this->run_trials();
this->run_trials_epilogue();
this->generate_summaries();
}
@@ -192,47 +190,12 @@ private:
void run_trials()
{
m_timeout_timer.start();
kernel_launch_timer<use_blocking_kernel> timer(*this);
do
{
this->launch_kernel(timer);
const auto cur_cuda_time = m_cuda_timer.get_duration();
const auto cur_cpu_time = m_cpu_timer.get_duration();
m_cuda_times.push_back(cur_cuda_time);
m_cpu_times.push_back(cur_cpu_time);
m_total_cuda_time += cur_cuda_time;
m_total_cpu_time += cur_cpu_time;
++m_total_samples;
// Only consider the cuda noise in the convergence criteria.
m_cuda_noise = nvbench::detail::compute_noise(m_cuda_times,
m_total_cuda_time);
m_timeout_timer.stop();
const auto total_time = m_timeout_timer.get_duration();
if (m_run_once)
{
break;
}
if (m_total_cuda_time > m_min_time && // Min time okay
m_total_samples > m_min_samples && // Min samples okay
m_cuda_noise < m_max_noise) // Noise okay
{
break;
}
if (total_time > m_timeout) // Max time exceeded, stop iterating.
{
m_max_time_exceeded = true;
break;
}
} while (true);
m_cpu_noise = nvbench::detail::compute_noise(m_cpu_times, m_total_cpu_time);
this->record_measurements();
} while (!this->is_finished());
}
template <typename TimerT>

View File

@@ -0,0 +1,129 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/config.cuh>
#include <nvbench/detail/statistics.cuh>
#include <cassert>
#include <vector>
namespace nvbench::detail
{
/**
* @brief A simple, dynamically sized ring buffer.
*/
template <typename T>
struct ring_buffer
{
/**
* Create a new ring buffer with the requested capacity.
*/
explicit ring_buffer(std::size_t capacity)
: m_buffer(capacity)
{}
/**
* Iterators provide all values in the ring buffer in unspecified order.
* @{
*/
// clang-format off
[[nodiscard]] auto begin() { return m_buffer.begin(); }
[[nodiscard]] auto begin() const { return m_buffer.begin(); }
[[nodiscard]] auto cbegin() const { return m_buffer.cbegin(); }
[[nodiscard]] auto end() { return m_buffer.begin() + this->size(); }
[[nodiscard]] auto end() const { return m_buffer.begin() + this->size(); }
[[nodiscard]] auto cend() const { return m_buffer.cbegin() + this->size(); }
// clang-format on
/** @} */
/**
* The number of valid values in the ring buffer. Always <= capacity().
*/
[[nodiscard]] std::size_t size() const
{
return m_full ? m_buffer.size() : m_index;
}
/**
* The maximum size of the ring buffer.
*/
[[nodiscard]] std::size_t capacity() const
{
return m_buffer.size();
}
/**
* @return True if the ring buffer is empty.
*/
[[nodiscard]] bool empty() const { return m_index == 0 && !m_full; }
/**
* Remove all values from the buffer without modifying capacity.
*/
void clear()
{
m_index = 0;
m_full = false;
}
/**
* Add a new value to the ring buffer. If size() == capacity(), the oldest
* element in the buffer is overwritten.
*/
void push_back(T val)
{
assert(m_index < m_buffer.size());
m_buffer[m_index] = val;
m_index = (m_index + 1) % m_buffer.size();
if (m_index == 0)
{ // buffer wrapped
m_full = true;
}
}
/**
* Get the most recently added value.
* @{
*/
[[nodiscard]] auto back() const
{
assert(!this->empty());
const auto back_index = m_index == 0 ? m_buffer.size() - 1 : m_index - 1;
return m_buffer[back_index];
}
[[nodiscard]] auto back()
{
assert(!this->empty());
const auto back_index = m_index == 0 ? m_buffer.size() - 1 : m_index - 1;
return m_buffer[back_index];
}
/**@}*/
private:
std::vector<T> m_buffer;
std::size_t m_index{0};
bool m_full{false};
};
} // namespace nvbench::detail

View File

@@ -18,48 +18,47 @@
#pragma once
#include <nvbench/types.cuh>
#include <nvbench/detail/transform_reduce.cuh>
#include <cmath>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>
#include <vector>
#include <type_traits>
namespace nvbench::detail
namespace nvbench::detail::statistics
{
/**
* Given a vector of samples and the precomputed sum of all samples in the
* vector, return a measure of the noise in the samples.
* Computes and returns the unbiased sample standard deviation.
*
* The noise metric is the relative unbiased sample standard deviation
* (std_dev / mean).
* If the input has fewer than 5 sample, infinity is returned.
*/
inline nvbench::float64_t
compute_noise(const std::vector<nvbench::float64_t> &data,
nvbench::float64_t sum)
template <typename Iter,
typename ValueType = typename std::iterator_traits<Iter>::value_type>
ValueType standard_deviation(Iter first, Iter last, ValueType mean)
{
const auto num = static_cast<nvbench::float64_t>(data.size());
static_assert(std::is_floating_point_v<ValueType>);
const auto num = last - first;
if (num < 5) // don't bother with low sample sizes.
{
return std::numeric_limits<nvbench::float64_t>::infinity();
return std::numeric_limits<ValueType>::infinity();
}
const auto mean = sum / num;
const auto variance =
nvbench::detail::transform_reduce(data.cbegin(),
data.cend(),
0.,
std::plus<>{},
[mean](nvbench::float64_t val) {
val -= mean;
val *= val;
return val;
}) /
(num - 1);
const auto abs_stdev = std::sqrt(variance);
return abs_stdev / mean;
const auto variance = nvbench::detail::transform_reduce(first,
last,
ValueType{},
std::plus<>{},
[mean](auto val) {
val -= mean;
val *= val;
return val;
}) /
static_cast<ValueType>((num - 1));
return std::sqrt(variance);
}
} // namespace nvbench::detail
} // namespace nvbench::detail::statistics

View File

@@ -18,6 +18,8 @@
#pragma once
#include <utility>
// Many compilers still don't ship transform_reduce with their STLs, so here's
// a naive implementation that will work everywhere. This is never used in a
// critical section, so perf isn't a concern.

View File

@@ -128,7 +128,7 @@ const summary &state::get_summary(std::string_view name) const
[&name](const auto &s) { return s.get_name() == name; });
if (iter == m_summaries.cend())
{
NVBENCH_THROW(std::runtime_error, "No summary named '{}'.", name);
NVBENCH_THROW(std::invalid_argument, "No summary named '{}'.", name);
}
return *iter;
}
@@ -140,7 +140,7 @@ summary &state::get_summary(std::string_view name)
[&name](auto &s) { return s.get_name() == name; });
if (iter == m_summaries.end())
{
NVBENCH_THROW(std::runtime_error, "No summary named '{}'.", name);
NVBENCH_THROW(std::invalid_argument, "No summary named '{}'.", name);
}
return *iter;
}

View File

@@ -262,10 +262,13 @@ def main():
to_compare = []
if os.path.isdir(files_or_dirs[0]) and os.path.isdir(files_or_dirs[1]):
for f in os.listdir(files_or_dirs[1]):
if os.path.splitext(f)[1] != ".json":
continue
r = os.path.join(files_or_dirs[0], f)
c = os.path.join(files_or_dirs[1], f)
if os.path.isfile(r) and os.path.isfile(c):
to_compare.append( (r,c) )
if os.path.isfile(r) and os.path.isfile(c) and \
os.path.getsize(r) > 0 and os.path.getsize(c) > 0:
to_compare.append((r, c))
else:
to_compare = [(files_or_dirs[0],files_or_dirs[1])]

View File

@@ -10,6 +10,7 @@ set(test_srcs
named_values.cu
option_parser.cu
range.cu
ring_buffer.cu
runner.cu
state.cu
state_generator.cu
@@ -36,3 +37,4 @@ foreach(test_src IN LISTS test_srcs)
endforeach()
add_subdirectory(cmake)
add_subdirectory(device)

View File

@@ -0,0 +1,14 @@
# Test that we're converging to an accurate mean + stdev without timing out:
set(test_name nvbench.test.device.noisy_bench)
add_executable(${test_name} noisy_bench.cu)
target_link_libraries(${test_name} PRIVATE nvbench::main fmt)
nvbench_config_target(${test_name})
add_dependencies(nvbench.test.all ${test_name})
if (NVBench_ENABLE_DEVICE_TESTING)
add_test(NAME ${test_name} COMMAND "$<TARGET_FILE:${test_name}>")
set_tests_properties(${test_name} PROPERTIES
# Any timeouts/warnings are hard failures for this test.
FAIL_REGULAR_EXPRESSION "Warn;timed out"
)
endif()

View File

@@ -0,0 +1,146 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/nvbench.cuh>
#include <nvbench/test_kernels.cuh>
#include <fmt/format.h>
#include <algorithm>
#include <limits>
#include <random>
#include <stdexcept>
void noisy_bench(nvbench::state &state)
{
// time, convert ms -> s
const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) /
1000.f;
// rel stdev
const auto noise_pct =
static_cast<nvbench::float32_t>(state.get_float64("Noise"));
const auto noise = noise_pct / 100.f;
// abs stdev
const auto stdev = noise * mean;
std::minstd_rand rng{};
std::normal_distribution<nvbench::float32_t> dist(mean, stdev);
// cold tag will save time by disabling batch measurements
state.exec(nvbench::exec_tag::impl::cold, [&](nvbench::launch &launch) {
const auto seconds = dist(rng);
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(seconds);
});
const auto measured_mean = static_cast<nvbench::float32_t>(
state.get_summary("Average GPU Time (Cold)").get_float64("value"));
const auto measured_noise = [&]() {
try
{
return static_cast<nvbench::float32_t>(
state.get_summary("GPU Relative Standard Deviation (Cold)")
.get_float64("value"));
}
catch (std::invalid_argument &)
{
return std::numeric_limits<nvbench::float32_t>::infinity();
}
}();
const auto measured_stdev = measured_noise * measured_mean;
const auto mean_error = std::fabs(measured_mean - mean);
const auto stdev_error = std::fabs(measured_stdev - stdev);
const auto noise_error = std::fabs(measured_noise - noise);
const auto mean_threshold = std::max(0.025f * mean, 8e-6f); // 2.5% or 8us
const auto stdev_threshold = std::max(0.05f * stdev, 5e-6f); // 5% or 5us
const auto mean_pass = mean_error < mean_threshold;
const auto stdev_pass = stdev_error < stdev_threshold;
fmt::print("| {:^5} "
"| {:^12} | {:^12} "
"| {:^12} | {:^12} | {:^4} |\n",
"",
"Expected",
"Measured",
"Error",
"Threshold",
"Flag");
fmt::print("|{:-^7}"
"|{:-^14}|{:-^14}"
"|{:-^14}|{:-^14}|{:-^6}|\n",
"",
"",
"",
"",
"",
"");
fmt::print("| Mean "
"| {:>9.6f} ms | {:>9.6f} ms "
"| {:>9.6f} ms | {:>9.6f} ms | {:4} |\n"
"| Stdev "
"| {:>9.6f} ms | {:>9.6f} ms "
"| {:>9.6f} ms | {:>9.6f} ms | {:4} |\n"
"| Noise "
"| {:>9.6f}% | {:>9.6f}% "
"| {:>9.6f}% | {:5} | {:4} |\n",
mean * 1000,
measured_mean * 1000,
mean_error * 1000,
mean_threshold * 1000,
mean_pass ? "" : "!!!!",
stdev * 1000,
measured_stdev * 1000,
stdev_error * 1000,
stdev_threshold * 1000,
stdev_pass ? "" : "!!!!",
noise * 100,
measured_noise * 100,
noise_error * 100,
"",
"");
if (!mean_pass)
{
// This isn't actually logged, it just tells ctest to mark the test as
// skipped as a soft-failure.
fmt::print("Warn: Mean error exceeds threshold: ({:.3} ms > {:.3} ms)\n",
mean_error * 1000,
mean_threshold * 1000);
}
if (!stdev_pass)
{
// This isn't actually logged, it just tells ctest to mark the test as
// skipped as a soft-failure.
fmt::print("Warn: Stdev error exceeds threshold: "
"({:.6} ms > {:.6} ms, noise: {:.3}%)\n",
stdev_error * 1000,
stdev_threshold * 1000,
measured_noise * 100);
}
}
NVBENCH_BENCH(noisy_bench)
.add_float64_axis("Mean", {0.05, 0.1, 0.5, 1.0, 10.0}) // ms
.add_float64_axis("Noise", {0.1, 5., 25.}) // %
// disable this; we want to test that the benchmarking loop will still exit
// when max_noise is never reached:
.set_max_noise(0.0000001);

90
testing/ring_buffer.cu Normal file
View File

@@ -0,0 +1,90 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/detail/ring_buffer.cuh>
#include "test_asserts.cuh"
#include <algorithm>
#include <vector>
template <typename T>
bool equal(const nvbench::detail::ring_buffer<T> &buffer,
const std::vector<T> &reference)
{
return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin());
}
int main()
try
{
nvbench::detail::ring_buffer<int> avg(3);
ASSERT(avg.capacity() == 3);
ASSERT(avg.size() == 0);
ASSERT(avg.empty());
ASSERT(equal(avg, {0, 0, 0}));
avg.push_back(32);
ASSERT(!avg.empty());
ASSERT(avg.size() == 1);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 32, " (got {})", avg.back());
ASSERT(equal(avg, {32, 0, 0}));
avg.push_back(2);
ASSERT(avg.size() == 2);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 2, " (got {})", avg.back());
ASSERT(equal(avg, {32, 2, 0}));
avg.push_back(-15);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == -15, " (got {})", avg.back());
ASSERT(equal(avg, {32, 2, -15}));
avg.push_back(5);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT_MSG(avg.back() == 5, " (got {})", avg.back());
ASSERT(equal(avg, {5, 2, -15}));
avg.push_back(0);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT(equal(avg, {5, 0, -15}));
ASSERT_MSG(avg.back() == 0, " (got {})", avg.back());
avg.push_back(128);
ASSERT(avg.size() == 3);
ASSERT(avg.capacity() == 3);
ASSERT(equal(avg, {5, 0, 128}));
ASSERT_MSG(avg.back() == 128, " (got {})", avg.back());
avg.clear();
ASSERT(avg.empty());
ASSERT(avg.size() == 0);
ASSERT(avg.capacity() == 3);
return 0;
}
catch (std::exception &err)
{
fmt::print(stderr, "{}", err.what());
return 1;
}