diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..1649e16 --- /dev/null +++ b/.clangd @@ -0,0 +1,63 @@ +# https://clangd.llvm.org/config + +# Apply a config conditionally to all C files +If: + PathMatch: .*\.(c|h)$ + +--- + +# Apply a config conditionally to all C++ files +If: + PathMatch: .*\.(c|h)pp + +--- + +# Apply a config conditionally to all CUDA files +If: + PathMatch: .*\.cuh? +CompileFlags: + Add: + # Allow variadic CUDA functions + - "-Xclang=-fcuda-allow-variadic-functions" + +--- + +# Tweak the clangd parse settings for all files +CompileFlags: + Compiler: clang++ + CompilationDatabase: . + Add: + - -x + - cuda + # report all errors + - "-ferror-limit=0" + - "-ftemplate-backtrace-limit=0" + - "-stdlib=libc++" + - "-std=c++17" + Remove: + # strip CUDA fatbin args + - "-Xfatbin*" + - "-Xcompiler*" + - "-Xcudafe*" + - "-rdc=*" + - "-gpu=*" + - "--diag_suppress*" + # strip CUDA arch flags + - "-gencode*" + - "--generate-code*" + # strip gcc's -fcoroutines + - -fcoroutines + # strip CUDA flags unknown to clang + - "-ccbin*" + - "--compiler-options*" + - "--expt-extended-lambda" + - "--expt-relaxed-constexpr" + - "-forward-unknown-to-host-compiler" + - "-Werror=cross-execution-space-call" +Diagnostics: + Suppress: + - "variadic_device_fn" + - "attributes_not_allowed" + # The NVHPC version of _NVCXX_EXPAND_PACK macro triggers this clang error. + # Temporarily suppressing it, but should probably fix + - "template_param_shadow" diff --git a/docs/cli_help.md b/docs/cli_help.md index 8629e8f..0336c5e 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -89,8 +89,15 @@ * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. +* `--stopping-criterion ` + * After `--min-samples` is satisfied, use `` to detect if enough + samples were collected. + * Only applies to Cold measurements. + * Default is stdrel (`--stopping-criterion stdrel`) + * `--min-time ` * Accumulate at least `` of execution time per measurement. + * Only applies to `stdrel` stopping criterion. * Default is 0.5 seconds. * If both GPU and CPU times are gathered, this applies to GPU time only. * Applies to the most recent `--benchmark`, or all benchmarks if specified @@ -100,6 +107,7 @@ * Gather samples until the error in the measurement drops below ``. * Noise is specified as the percent relative standard deviation. * Default is 0.5% (`--max-noise 0.5`) + * Only applies to `stdrel` stopping criterion. * Only applies to Cold measurements. * If both GPU and CPU times are gathered, this applies to GPU noise only. * Applies to the most recent `--benchmark`, or all benchmarks if specified diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 219fc89..d1a5e14 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,12 +1,13 @@ set(example_srcs + auto_throughput.cu axes.cu + custom_criterion.cu enums.cu exec_tag_sync.cu exec_tag_timer.cu skip.cu stream.cu throughput.cu - auto_throughput.cu ) # Metatarget for all examples: diff --git a/examples/custom_criterion.cu b/examples/custom_criterion.cu new file mode 100644 index 0000000..4dbee9e --- /dev/null +++ b/examples/custom_criterion.cu @@ -0,0 +1,80 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// Grab some testing kernels from NVBench: +#include + +// Thrust vectors simplify memory management: +#include + +// Inherit from the stopping_criterion_base class: +class fixed_criterion final : public nvbench::stopping_criterion_base +{ + nvbench::int64_t m_num_samples{}; + +public: + fixed_criterion() + : nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}} + {} + +protected: + // Setup the criterion in the `do_initialize()` method: + virtual void do_initialize() override + { + m_num_samples = 0; + } + + // Process new measurements in the `add_measurement()` method: + virtual void do_add_measurement(nvbench::float64_t /* measurement */) override + { + m_num_samples++; + } + + // Check if the stopping criterion is met in the `is_finished()` method: + virtual bool do_is_finished() override + { + return m_num_samples >= m_params.get_int64("max-samples"); + } + +}; + +// Register the criterion with NVBench: +NVBENCH_REGISTER_CRITERION(fixed_criterion); + +void throughput_bench(nvbench::state &state) +{ + // Allocate input data: + const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t); + thrust::device_vector input(num_values); + thrust::device_vector output(num_values); + + // Provide throughput information: + state.add_element_count(num_values, "NumElements"); + state.add_global_memory_reads(num_values, "DataSize"); + state.add_global_memory_writes(num_values); + + state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) { + nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( + thrust::raw_pointer_cast(input.data()), + thrust::raw_pointer_cast(output.data()), + num_values); + }); +} +NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed"); diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 938ed6a..182843c 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -4,6 +4,7 @@ set(srcs benchmark_base.cxx benchmark_manager.cxx blocking_kernel.cu + criterion_manager.cxx csv_printer.cu cuda_call.cu device_info.cu @@ -17,13 +18,16 @@ set(srcs printer_multiplex.cxx runner.cxx state.cxx + stopping_criterion.cxx string_axis.cxx type_axis.cxx type_strings.cxx + detail/entropy_criterion.cxx detail/measure_cold.cu detail/measure_hot.cu detail/state_generator.cxx + detail/stdrel_criterion.cxx internal/nvml.cxx ) diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index 723afcd..55673b0 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include // reference_wrapper, ref #include @@ -181,22 +182,30 @@ struct benchmark_base } /// @} - /// Accumulate at least this many seconds of timing data per measurement. @{ - [[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; } + /// Accumulate at least this many seconds of timing data per measurement. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_min_time() const + { + return m_criterion_params.get_float64("min-time"); + } benchmark_base &set_min_time(nvbench::float64_t min_time) { - m_min_time = min_time; + m_criterion_params.set_float64("min-time", min_time); return *this; } /// @} /// Specify the maximum amount of noise if a measurement supports noise. /// Noise is the relative standard deviation: - /// `noise = stdev / mean_time`. @{ - [[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; } + /// `noise = stdev / mean_time`. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_max_noise() const + { + return m_criterion_params.get_float64("max-noise"); + } benchmark_base &set_max_noise(nvbench::float64_t max_noise) { - m_max_noise = max_noise; + m_criterion_params.set_float64("max-noise", max_noise); return *this; } /// @} @@ -230,6 +239,19 @@ struct benchmark_base } /// @} + [[nodiscard]] nvbench::criterion_params& get_criterion_params() { return m_criterion_params; } + [[nodiscard]] const nvbench::criterion_params& get_criterion_params() const { return m_criterion_params; } + + /// Control the stopping criterion for the measurement loop. + /// @{ + [[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; } + benchmark_base &set_stopping_criterion(std::string criterion) + { + m_stopping_criterion = std::move(criterion); + return *this; + } + /// @} + protected: friend struct nvbench::runner_base; @@ -247,12 +269,13 @@ protected: bool m_disable_blocking_kernel{false}; nvbench::int64_t m_min_samples{10}; - nvbench::float64_t m_min_time{0.5}; - nvbench::float64_t m_max_noise{0.005}; // 0.5% relative standard deviation nvbench::float64_t m_skip_time{-1.}; nvbench::float64_t m_timeout{15.}; + nvbench::criterion_params m_criterion_params; + std::string m_stopping_criterion{"stdrel"}; + private: // route these through virtuals so the templated subclass can inject type info virtual std::unique_ptr do_clone() const = 0; diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx index 5121a52..6e89fd3 100644 --- a/nvbench/benchmark_base.cxx +++ b/nvbench/benchmark_base.cxx @@ -34,13 +34,14 @@ std::unique_ptr benchmark_base::clone() const result->m_axes = m_axes; result->m_devices = m_devices; - result->m_min_samples = m_min_samples; - result->m_min_time = m_min_time; - result->m_max_noise = m_max_noise; + result->m_min_samples = m_min_samples; + result->m_criterion_params = m_criterion_params; result->m_skip_time = m_skip_time; result->m_timeout = m_timeout; + result->m_stopping_criterion = m_stopping_criterion; + return result; } diff --git a/nvbench/criterion_manager.cuh b/nvbench/criterion_manager.cuh new file mode 100644 index 0000000..6c60993 --- /dev/null +++ b/nvbench/criterion_manager.cuh @@ -0,0 +1,65 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace nvbench +{ + +class criterion_manager +{ + std::unordered_map> m_map; + + criterion_manager(); + +public: + /** + * @return The singleton criterion_manager instance. + */ + static criterion_manager& get(); + + /** + * Register a new stopping criterion. + */ + nvbench::stopping_criterion_base& add(std::unique_ptr criterion); + nvbench::stopping_criterion_base& get_criterion(const std::string& name); + const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const; + + using params_description = std::vector>; + params_description get_params_description() const; +}; + +/** + * Given a stopping criterion type `TYPE`, registers it in the criterion manager + * + * See the `custom_criterion.cu` example for usage. + */ +#define NVBENCH_REGISTER_CRITERION(TYPE) \ + static nvbench::stopping_criterion_base &NVBENCH_UNIQUE_IDENTIFIER(TYPE) = \ + nvbench::criterion_manager::get().add(std::make_unique()) + +} // namespace nvbench diff --git a/nvbench/criterion_manager.cxx b/nvbench/criterion_manager.cxx new file mode 100644 index 0000000..6ba27f6 --- /dev/null +++ b/nvbench/criterion_manager.cxx @@ -0,0 +1,100 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace nvbench +{ + +criterion_manager::criterion_manager() +{ + m_map.emplace("stdrel", std::make_unique()); + m_map.emplace("entropy", std::make_unique()); +} + +criterion_manager &criterion_manager::get() +{ + static criterion_manager registry; + return registry; +} + +stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) +{ + auto iter = m_map.find(name); + if (iter == m_map.end()) + { + NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name); + } + return *iter->second.get(); +} + +const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const +{ + auto iter = m_map.find(name); + if (iter == m_map.end()) + { + NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name); + } + return *iter->second.get(); +} + +stopping_criterion_base &criterion_manager::add(std::unique_ptr criterion) +{ + const std::string name = criterion->get_name(); + + auto [it, success] = m_map.emplace(name, std::move(criterion)); + + if (!success) + { + NVBENCH_THROW(std::runtime_error, + "Stopping criterion \"{}\" is already registered.", name); + } + + return *it->second.get(); +} + +nvbench::criterion_manager::params_description criterion_manager::get_params_description() const +{ + nvbench::criterion_manager::params_description desc; + + for (auto &[criterion_name, criterion] : m_map) + { + nvbench::criterion_params params = criterion->get_params(); + + for (auto param : params.get_names()) + { + nvbench::named_values::type type = params.get_type(param); + if (std::find_if(desc.begin(), desc.end(), [&](auto d) { + return d.first == param && d.second != type; + }) != desc.end()) + { + NVBENCH_THROW(std::runtime_error, + "Stopping criterion \"{}\" parameter \"{}\" is already used by another " + "criterion with a different type.", + criterion_name, + param); + } + desc.emplace_back(param, type); + } + } + + return desc; +} + +} // namespace nvbench diff --git a/nvbench/detail/entropy_criterion.cuh b/nvbench/detail/entropy_criterion.cuh new file mode 100644 index 0000000..b0e4ebe --- /dev/null +++ b/nvbench/detail/entropy_criterion.cuh @@ -0,0 +1,55 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace nvbench::detail +{ + +class entropy_criterion final : public stopping_criterion_base +{ + // state + nvbench::int64_t m_total_samples{}; + nvbench::float64_t m_total_cuda_time{}; + std::vector> m_freq_tracker; + + // TODO The window size should be user-configurable + nvbench::detail::ring_buffer m_entropy_tracker{299}; + + // Used to avoid re-allocating temporary memory + std::vector m_probabilities; + + nvbench::float64_t compute_entropy(); + +public: + entropy_criterion(); + +protected: + virtual void do_initialize() override; + virtual void do_add_measurement(nvbench::float64_t measurement) override; + virtual bool do_is_finished() override; + +}; + +} // namespace nvbench::detail diff --git a/nvbench/detail/entropy_criterion.cxx b/nvbench/detail/entropy_criterion.cxx new file mode 100644 index 0000000..6d9ba8c --- /dev/null +++ b/nvbench/detail/entropy_criterion.cxx @@ -0,0 +1,137 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + + +namespace nvbench::detail +{ + +entropy_criterion::entropy_criterion() + : stopping_criterion_base{"entropy", {{"max-angle", 0.048}, {"min-r2", 0.36}}} +{ + m_freq_tracker.reserve(m_entropy_tracker.capacity() * 2); + m_probabilities.reserve(m_entropy_tracker.capacity() * 2); +} + +void entropy_criterion::do_initialize() +{ + m_total_samples = 0; + m_total_cuda_time = 0.0; + m_entropy_tracker.clear(); + m_freq_tracker.clear(); +} + +nvbench::float64_t entropy_criterion::compute_entropy() +{ + const std::size_t n = m_freq_tracker.size(); + if (n == 0) + { + return 0.0; + } + + m_probabilities.resize(n); + for (std::size_t i = 0; i < n; i++) + { + m_probabilities[i] = static_cast(m_freq_tracker[i].second) / + static_cast(m_total_samples); + } + + nvbench::float64_t entropy{}; + for (nvbench::float64_t p : m_probabilities) + { + entropy -= p * std::log2(p); + } + + return entropy; +} + +void entropy_criterion::do_add_measurement(nvbench::float64_t measurement) +{ + m_total_samples++; + m_total_cuda_time += measurement; + + { + auto key = measurement; + constexpr bool bin_keys = false; + + if (bin_keys) + { + const auto resolution_us = 0.5; + const auto resulution_s = resolution_us / 1'000'000; + const auto epsilon = resulution_s * 2; + key = std::round(key / epsilon) * epsilon; + } + + // This approach is about 3x faster than `std::{unordered_,}map` + // Up to 100k samples, only about 20% slower than corresponding stdrel method + auto it = std::lower_bound(m_freq_tracker.begin(), + m_freq_tracker.end(), + std::make_pair(key, nvbench::int64_t{})); + + if (it != m_freq_tracker.end() && it->first == key) + { + it->second += 1; + } + else + { + m_freq_tracker.insert(it, std::make_pair(key, nvbench::int64_t{1})); + } + } + + m_entropy_tracker.push_back(compute_entropy()); +} + +bool entropy_criterion::do_is_finished() +{ + if (m_entropy_tracker.size() < 2) + { + return false; + } + + // Even number of samples is used to reduce the overhead and not required to compute entropy. + // This makes `is_finished()` about 20% faster than corresponding stdrel method. + if (m_total_samples % 2 != 0) + { + return false; + } + + auto begin = m_entropy_tracker.cbegin(); + auto end = m_entropy_tracker.cend(); + auto mean = statistics::compute_mean(begin, end); + + const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean); + + if (statistics::slope2deg(slope) > m_params.get_float64("max-angle")) + { + return false; + } + + const auto r2 = statistics::compute_r2(begin, end, mean, slope, intercept); + if (r2 < m_params.get_float64("min-r2")) + { + return false; + } + + return true; +} + +} // namespace nvbench::detail diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 69ceb7e..16866a5 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -16,38 +16,37 @@ * limitations under the License. */ -#include - #include +#include +#include +#include #include #include #include #include -#include -#include - #include -#include -#include -#include -#include - namespace nvbench::detail { measure_cold_base::measure_cold_base(state &exec_state) : m_state{exec_state} , m_launch{m_state.get_cuda_stream()} + , m_criterion_params{exec_state.get_criterion_params()} + , m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())} , m_run_once{exec_state.get_run_once()} , m_no_block{exec_state.get_disable_blocking_kernel()} , m_min_samples{exec_state.get_min_samples()} - , m_max_noise{exec_state.get_max_noise()} - , m_min_time{exec_state.get_min_time()} , m_skip_time{exec_state.get_skip_time()} , m_timeout{exec_state.get_timeout()} -{} +{ + if (m_min_samples > 0) + { + m_cuda_times.reserve(m_min_samples); + m_cpu_times.reserve(m_min_samples); + } +} void measure_cold_base::check() { @@ -68,10 +67,11 @@ void measure_cold_base::initialize() m_total_cpu_time = 0.; m_cpu_noise = 0.; m_total_samples = 0; - m_noise_tracker.clear(); m_cuda_times.clear(); m_cpu_times.clear(); m_max_time_exceeded = false; + + m_stopping_criterion.initialize(m_criterion_params); } void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); } @@ -87,16 +87,7 @@ void measure_cold_base::record_measurements() m_total_cpu_time += cur_cpu_time; ++m_total_samples; - // Compute convergence statistics using CUDA timings: - const auto mean_cuda_time = m_total_cuda_time / static_cast(m_total_samples); - const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), - m_cuda_times.cend(), - mean_cuda_time); - auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; - if (std::isfinite(cuda_rel_stdev)) - { - m_noise_tracker.push_back(cuda_rel_stdev); - } + m_stopping_criterion.add_measurement(cur_cuda_time); } bool measure_cold_base::is_finished() @@ -107,39 +98,12 @@ bool measure_cold_base::is_finished() } // Check that we've gathered enough samples: - if (m_total_cuda_time > m_min_time && m_total_samples > m_min_samples) + if (m_total_samples > m_min_samples) { - // Noise has dropped below threshold - if (m_noise_tracker.back() < m_max_noise) + if (m_stopping_criterion.is_finished()) { return true; } - - // Check if the noise (cuda rel stdev) has converged by inspecting a - // trailing window of recorded noise measurements. - // This helps identify benchmarks that are inherently noisy and would - // never converge to the target stdev threshold. This check ensures that the - // benchmark will end if the stdev stabilizes above the target threshold. - // Gather some iterations before checking noise, and limit how often we - // check this. - if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0)) - { - // Use the current noise as the stdev reference. - const auto current_noise = m_noise_tracker.back(); - const auto noise_stdev = - nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(), - m_noise_tracker.cend(), - current_noise); - const auto noise_rel_stdev = noise_stdev / current_noise; - - // If the rel stdev of the last N cuda noise measurements is less than - // 5%, consider the result stable. - const auto noise_threshold = 0.05; - if (noise_rel_stdev < noise_threshold) - { - return true; - } - } } // Check for timeouts: @@ -206,14 +170,21 @@ void measure_cold_base::generate_summaries() summ.set_float64("value", avg_cuda_time); } + const auto mean_cuda_time = m_total_cuda_time / static_cast(m_total_samples); + const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), + m_cuda_times.cend(), + mean_cuda_time); + const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; + const auto noise = cuda_rel_stdev; + const auto max_noise = m_criterion_params.get_float64("max-noise"); + const auto min_time = m_criterion_params.get_float64("min-time"); + { auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative"); summ.set_string("name", "Noise"); summ.set_string("hint", "percentage"); summ.set_string("description", "Relative standard deviation of isolated GPU times"); - summ.set_float64("value", - m_noise_tracker.empty() ? std::numeric_limits::infinity() - : m_noise_tracker.back()); + summ.set_float64("value", noise); } if (const auto items = m_state.get_element_count(); items != 0) @@ -270,15 +241,15 @@ void measure_cold_base::generate_summaries() { const auto timeout = m_walltime_timer.get_duration(); - if (!m_noise_tracker.empty() && m_noise_tracker.back() > m_max_noise) + if (noise > max_noise) { printer.log(nvbench::log_level::warn, fmt::format("Current measurement timed out ({:0.2f}s) " "while over noise threshold ({:0.2f}% > " "{:0.2f}%)", timeout, - m_noise_tracker.back() * 100, - m_max_noise * 100)); + noise * 100, + max_noise * 100)); } if (m_total_samples < m_min_samples) { @@ -289,7 +260,7 @@ void measure_cold_base::generate_summaries() m_total_samples, m_min_samples)); } - if (m_total_cuda_time < m_min_time) + if (m_total_cuda_time < min_time) { printer.log(nvbench::log_level::warn, fmt::format("Current measurement timed out ({:0.2f}s) " @@ -297,7 +268,7 @@ void measure_cold_base::generate_summaries() "{:0.2f}s)", timeout, m_total_cuda_time, - m_min_time)); + min_time)); } } diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index a5c2604..2b0183f 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -25,15 +25,14 @@ #include #include #include +#include #include #include -#include #include #include -#include #include #include @@ -87,12 +86,13 @@ protected: nvbench::detail::l2flush m_l2flush; nvbench::blocking_kernel m_blocker; + nvbench::criterion_params m_criterion_params; + nvbench::stopping_criterion_base& m_stopping_criterion; + bool m_run_once{false}; bool m_no_block{false}; nvbench::int64_t m_min_samples{}; - nvbench::float64_t m_max_noise{}; // rel stdev - nvbench::float64_t m_min_time{}; nvbench::float64_t m_skip_time{}; nvbench::float64_t m_timeout{}; @@ -102,9 +102,6 @@ protected: nvbench::float64_t m_total_cpu_time{}; nvbench::float64_t m_cpu_noise{}; // rel stdev - // Trailing history of noise measurements for convergence tests - nvbench::detail::ring_buffer m_noise_tracker{512}; - std::vector m_cuda_times; std::vector m_cpu_times; diff --git a/nvbench/detail/ring_buffer.cuh b/nvbench/detail/ring_buffer.cuh index 77d652a..5c00b24 100644 --- a/nvbench/detail/ring_buffer.cuh +++ b/nvbench/detail/ring_buffer.cuh @@ -22,12 +22,91 @@ #include +#include +#include #include #include namespace nvbench::detail { +template +class ring_buffer_iterator +{ + std::ptrdiff_t m_index; + std::ptrdiff_t m_capacity; + T *m_ptr; + +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T *; + using reference = T &; + + ring_buffer_iterator(std::ptrdiff_t index, std::ptrdiff_t capacity, pointer ptr) + : m_index{index} + , m_capacity{capacity} + , m_ptr{ptr} + {} + + ring_buffer_iterator operator++() + { + ++m_index; + return *this; + } + + ring_buffer_iterator operator++(int) + { + ring_buffer_iterator temp = *this; + ++(*this); + return temp; + } + + ring_buffer_iterator &operator--() + { + --m_index; + return *this; + } + + ring_buffer_iterator operator--(int) + { + ring_buffer_iterator temp = *this; + --(*this); + return temp; + } + + ring_buffer_iterator operator+(difference_type n) const + { + return ring_buffer_iterator(m_index + n, m_capacity, m_ptr); + } + + ring_buffer_iterator operator-(difference_type n) const + { + return ring_buffer_iterator(m_index - n, m_capacity, m_ptr); + } + + difference_type operator-(const ring_buffer_iterator &other) const + { + return m_index - other.m_index; + } + + reference operator*() const { return m_ptr[m_index % m_capacity]; } + pointer operator->() const { return &(operator*()); } + + reference operator[](difference_type n) const { return *(*this + n); } + + bool operator==(const ring_buffer_iterator &other) const + { + return m_ptr == other.m_ptr && m_index == other.m_index; + } + bool operator!=(const ring_buffer_iterator &other) const { return !(*this == other); } + bool operator<(const ring_buffer_iterator &other) const { return m_index < other.m_index; } + bool operator>(const ring_buffer_iterator &other) const { return m_index > other.m_index; } + bool operator<=(const ring_buffer_iterator &other) const { return !(*this > other); } + bool operator>=(const ring_buffer_iterator &other) const { return !(*this < other); } +}; + /** * @brief A simple, dynamically sized ring buffer. */ @@ -42,7 +121,13 @@ private: std::size_t m_index{0}; bool m_full{false}; + std::size_t get_front_index() const + { + return m_full ? m_index : 0; + } + public: + /** * Create a new ring buffer with the requested capacity. */ @@ -51,17 +136,48 @@ public: {} /** - * Iterators provide all values in the ring buffer in unspecified order. + * Iterators provide all values in the ring buffer in FIFO order. * @{ */ - // clang-format off - [[nodiscard]] auto begin() { return m_buffer.begin(); } - [[nodiscard]] auto begin() const { return m_buffer.begin(); } - [[nodiscard]] auto cbegin() const { return m_buffer.cbegin(); } - [[nodiscard]] auto end() { return m_buffer.begin() + static_cast(this->size()); } - [[nodiscard]] auto end() const { return m_buffer.begin() + static_cast(this->size()); } - [[nodiscard]] auto cend() const { return m_buffer.cbegin() + static_cast(this->size()); } - // clang-format on + [[nodiscard]] ring_buffer_iterator begin() + { + return {static_cast(get_front_index()), + static_cast(capacity()), + m_buffer.data()}; + } + + [[nodiscard]] ring_buffer_iterator end() + { + return {static_cast(get_front_index() + size()), + static_cast(capacity()), + m_buffer.data()}; + } + [[nodiscard]] ring_buffer_iterator begin() const + { + return {static_cast(get_front_index()), + static_cast(capacity()), + m_buffer.data()}; + } + + [[nodiscard]] ring_buffer_iterator end() const + { + return {static_cast(get_front_index() + size()), + static_cast(capacity()), + m_buffer.data()}; + } + [[nodiscard]] ring_buffer_iterator cbegin() const + { + return {static_cast(get_front_index()), + static_cast(capacity()), + m_buffer.data()}; + } + + [[nodiscard]] ring_buffer_iterator cend() const + { + return {static_cast(get_front_index() + size()), + static_cast(capacity()), + m_buffer.data()}; + } /** @} */ /** diff --git a/nvbench/detail/statistics.cuh b/nvbench/detail/statistics.cuh index ddafc43..311a20d 100644 --- a/nvbench/detail/statistics.cuh +++ b/nvbench/detail/statistics.cuh @@ -18,14 +18,15 @@ #pragma once -#include - #include +#include #include #include #include #include +#include + #include namespace nvbench::detail::statistics @@ -41,7 +42,8 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean) { static_assert(std::is_floating_point_v); - const auto num = last - first; + const auto num = std::distance(first, last); + if (num < 5) // don't bother with low sample sizes. { return std::numeric_limits::infinity(); @@ -56,8 +58,135 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean) val *= val; return val; }) / - static_cast((num - 1)); + static_cast((num - 1)); // Bessel’s correction return std::sqrt(variance); } +/** + * Computes and returns the mean. + * + * If the input has fewer than 1 sample, infinity is returned. + */ +template +nvbench::float64_t compute_mean(It first, It last) +{ + const auto num = std::distance(first, last); + + if (num < 1) + { + return std::numeric_limits::infinity(); + } + + return std::accumulate(first, last, 0.0) / static_cast(num); +} + +/** + * Computes linear regression and returns the slope and intercept + * + * This version takes precomputed mean of [first, last). + * If the input has fewer than 2 samples, infinity is returned for both slope and intercept. + */ +template +std::pair +compute_linear_regression(It first, It last, nvbench::float64_t mean_y) +{ + const std::size_t n = static_cast(std::distance(first, last)); + + if (n < 2) + { + return std::make_pair(std::numeric_limits::infinity(), + std::numeric_limits::infinity()); + } + + // Assuming x starts from 0 + const nvbench::float64_t mean_x = (static_cast(n) - 1.0) / 2.0; + + // Calculate the numerator and denominator for the slope + nvbench::float64_t numerator = 0.0; + nvbench::float64_t denominator = 0.0; + + for (std::size_t i = 0; i < n; ++i, ++first) + { + const nvbench::float64_t x_diff = static_cast(i) - mean_x; + numerator += x_diff * (*first - mean_y); + denominator += x_diff * x_diff; + } + + // Calculate the slope and intercept + const nvbench::float64_t slope = numerator / denominator; + const nvbench::float64_t intercept = mean_y - slope * mean_x; + + return std::make_pair(slope, intercept); +} + +/** + * Computes linear regression and returns the slope and intercept + * + * If the input has fewer than 2 samples, infinity is returned for both slope and intercept. + */ +template +std::pair compute_linear_regression(It first, It last) +{ + return compute_linear_regression(first, last, compute_mean(first, last)); +} + +/** + * Computes and returns the R^2 (coefficient of determination) + * + * This version takes precomputed mean of [first, last). + */ +template +nvbench::float64_t compute_r2(It first, + It last, + nvbench::float64_t mean_y, + nvbench::float64_t slope, + nvbench::float64_t intercept) +{ + const std::size_t n = static_cast(std::distance(first, last)); + + nvbench::float64_t ss_tot = 0.0; + nvbench::float64_t ss_res = 0.0; + + for (std::size_t i = 0; i < n; ++i, ++first) + { + const nvbench::float64_t y = *first; + const nvbench::float64_t y_pred = slope * static_cast(i) + intercept; + + ss_tot += (y - mean_y) * (y - mean_y); + ss_res += (y - y_pred) * (y - y_pred); + } + + if (ss_tot == 0.0) + { + return 1.0; + } + + return 1.0 - ss_res / ss_tot; +} + +/** + * Computes and returns the R^2 (coefficient of determination) + */ +template +nvbench::float64_t +compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept) +{ + return compute_r2(first, last, compute_mean(first, last), slope, intercept); +} + +inline nvbench::float64_t rad2deg(nvbench::float64_t rad) +{ + return rad * 180.0 / M_PI; +} + +inline nvbench::float64_t slope2rad(nvbench::float64_t slope) +{ + return std::atan2(slope, 1.0); +} + +inline nvbench::float64_t slope2deg(nvbench::float64_t slope) +{ + return rad2deg(slope2rad(slope)); +} + } // namespace nvbench::detail::statistics diff --git a/nvbench/detail/stdrel_criterion.cuh b/nvbench/detail/stdrel_criterion.cuh new file mode 100644 index 0000000..5f87e84 --- /dev/null +++ b/nvbench/detail/stdrel_criterion.cuh @@ -0,0 +1,47 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace nvbench::detail +{ + +class stdrel_criterion final : public stopping_criterion_base +{ + // state + nvbench::int64_t m_total_samples{}; + nvbench::float64_t m_total_cuda_time{}; + std::vector m_cuda_times{}; + nvbench::detail::ring_buffer m_noise_tracker{512}; + +public: + stdrel_criterion(); + +protected: + virtual void do_initialize() override; + virtual void do_add_measurement(nvbench::float64_t measurement) override; + virtual bool do_is_finished() override; +}; + +} // namespace nvbench::detail diff --git a/nvbench/detail/stdrel_criterion.cxx b/nvbench/detail/stdrel_criterion.cxx new file mode 100644 index 0000000..a6c5ea8 --- /dev/null +++ b/nvbench/detail/stdrel_criterion.cxx @@ -0,0 +1,98 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace nvbench::detail +{ + +stdrel_criterion::stdrel_criterion() + : stopping_criterion_base{"stdrel", + {{"max-noise", nvbench::detail::compat_max_noise()}, + {"min-time", nvbench::detail::compat_min_time()}}} +{} + +void stdrel_criterion::do_initialize() +{ + m_total_samples = 0; + m_total_cuda_time = 0.0; + m_cuda_times.clear(); + m_noise_tracker.clear(); +} + +void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement) +{ + m_total_samples++; + m_total_cuda_time += measurement; + m_cuda_times.push_back(measurement); + + // Compute convergence statistics using CUDA timings: + const auto mean_cuda_time = m_total_cuda_time / static_cast(m_total_samples); + const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), + m_cuda_times.cend(), + mean_cuda_time); + const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; + if (std::isfinite(cuda_rel_stdev)) + { + m_noise_tracker.push_back(cuda_rel_stdev); + } +} + +bool stdrel_criterion::do_is_finished() +{ + if (m_total_cuda_time <= m_params.get_float64("min-time")) + { + return false; + } + + // Noise has dropped below threshold + if (m_noise_tracker.back() < m_params.get_float64("max-noise")) + { + return true; + } + + // Check if the noise (cuda rel stdev) has converged by inspecting a + // trailing window of recorded noise measurements. + // This helps identify benchmarks that are inherently noisy and would + // never converge to the target stdev threshold. This check ensures that the + // benchmark will end if the stdev stabilizes above the target threshold. + // Gather some iterations before checking noise, and limit how often we + // check this. + if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0)) + { + // Use the current noise as the stdev reference. + const auto current_noise = m_noise_tracker.back(); + const auto noise_stdev = + nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(), + m_noise_tracker.cend(), + current_noise); + const auto noise_rel_stdev = noise_stdev / current_noise; + + // If the rel stdev of the last N cuda noise measurements is less than + // 5%, consider the result stable. + const auto noise_threshold = 0.05; + if (noise_rel_stdev < noise_threshold) + { + return true; + } + } + + return false; +} + +} // namespace nvbench::detail diff --git a/nvbench/nvbench.cuh b/nvbench/nvbench.cuh index 75bf1c1..3fb933f 100644 --- a/nvbench/nvbench.cuh +++ b/nvbench/nvbench.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index a8db8ef..6dba745 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -376,6 +377,9 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, } }; + const nvbench::criterion_manager::params_description criterion_params = + nvbench::criterion_manager::get().get_params_description(); + while (first < last) { const auto &arg = *first; @@ -433,6 +437,12 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->enable_run_once(); first += 1; } + else if (arg == "--stopping-criterion") + { + check_params(1); + this->set_stopping_criterion(first[1]); + first += 2; + } else if (arg == "--disable-blocking-kernel") { this->disable_blocking_kernel(); @@ -504,16 +514,34 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->update_int64_prop(first[0], first[1]); first += 2; } - else if (arg == "--min-time" || arg == "--max-noise" || arg == "--skip-time" || - arg == "--timeout") + else if (arg == "--skip-time" || arg == "--timeout") { check_params(1); this->update_float64_prop(first[0], first[1]); first += 2; } else - { - NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg); + { // Try criterion params + if (arg.size() < 3 || arg[0] != '-' || arg[1] != '-') + { + NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg); + } + + std::string_view name(arg.c_str() + 2, arg.size() - 2); + auto it = std::find_if(criterion_params.begin(), + criterion_params.end(), + [&name](const auto ¶m) { return param.first == name; }); + + if (it != criterion_params.end()) + { + check_params(1); + this->update_criterion_prop(first[0], first[1], it->second); + first += 2; + } + else + { + NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg); + } } } } @@ -698,6 +726,20 @@ void option_parser::enable_run_once() bench.set_run_once(true); } +void option_parser::set_stopping_criterion(const std::string &criterion) +{ + // If no active benchmark, save args as global. + if (m_benchmarks.empty()) + { + m_global_benchmark_args.push_back("--stopping-criterion"); + m_global_benchmark_args.push_back(criterion); + return; + } + + benchmark_base &bench = *m_benchmarks.back(); + bench.set_stopping_criterion(criterion); +} + void option_parser::disable_blocking_kernel() { // If no active benchmark, save args as global. @@ -933,6 +975,58 @@ catch (std::exception &e) e.what()); } +void option_parser::update_criterion_prop( + const std::string &prop_arg, + const std::string &prop_val, + const nvbench::named_values::type type) +try +{ + // If no active benchmark, save args as global. + if (m_benchmarks.empty()) + { + m_global_benchmark_args.push_back(prop_arg); + m_global_benchmark_args.push_back(prop_val); + return; + } + + benchmark_base &bench = *m_benchmarks.back(); + nvbench::criterion_params& criterion_params = bench.get_criterion_params(); + std::string name(prop_arg.begin() + 2, prop_arg.end()); + if (type == nvbench::named_values::type::float64) + { + nvbench::float64_t value{}; + ::parse(prop_val, value); + + if (prop_arg == "--max-noise") + { // Specified as percentage, stored as ratio: + value /= 100.0; + } + criterion_params.set_float64(name, value); + } + else if (type == nvbench::named_values::type::int64) + { + nvbench::int64_t value{}; + ::parse(prop_val, value); + criterion_params.set_int64(name, value); + } + else if (type == nvbench::named_values::type::string) + { + criterion_params.set_string(name, prop_val); + } + else + { + NVBENCH_THROW(std::runtime_error, "Unrecognized property: `{}`", prop_arg); + } +} +catch (std::exception& e) +{ + NVBENCH_THROW(std::runtime_error, + "Error handling option `{} {}`:\n{}", + prop_arg, + prop_val, + e.what()); +} + void option_parser::update_float64_prop(const std::string &prop_arg, const std::string &prop_val) try { @@ -948,15 +1042,7 @@ try nvbench::float64_t value{}; ::parse(prop_val, value); - if (prop_arg == "--min-time") - { - bench.set_min_time(value); - } - else if (prop_arg == "--max-noise") - { // Specified as percentage, stored as ratio: - bench.set_max_noise(value / 100.); - } - else if (prop_arg == "--skip-time") + if (prop_arg == "--skip-time") { bench.set_skip_time(value); } diff --git a/nvbench/option_parser.cuh b/nvbench/option_parser.cuh index 1f334d6..5bd834c 100644 --- a/nvbench/option_parser.cuh +++ b/nvbench/option_parser.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -86,6 +87,7 @@ private: void set_persistence_mode(const std::string &state); void lock_gpu_clocks(const std::string &rate); + void set_stopping_criterion(const std::string &criterion); void enable_run_once(); void disable_blocking_kernel(); @@ -111,6 +113,10 @@ private: void update_int64_prop(const std::string &prop_arg, const std::string &prop_val); void update_float64_prop(const std::string &prop_arg, const std::string &prop_val); + void update_criterion_prop(const std::string &prop_arg, + const std::string &prop_val, + const nvbench::named_values::type type); + void update_used_device_state() const; // Command line args diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 53c7413..09795de 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,17 @@ struct state void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; } /// @} + [[nodiscard]] const nvbench::criterion_params &get_criterion_params() const + { + return m_criterion_params; + } + + /// Control the stopping criterion for the measurement loop. + /// @{ + [[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; } + void set_stopping_criterion(std::string criterion) { m_stopping_criterion = std::move(criterion); } + /// @} + /// If true, the benchmark is only run once, skipping all warmup runs and only /// executing a single non-batched measurement. This is intended for use with /// external profiling tools. @{ @@ -135,16 +147,30 @@ struct state void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; } /// @} - /// Accumulate at least this many seconds of timing data per measurement. @{ - [[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; } - void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; } + /// Accumulate at least this many seconds of timing data per measurement. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_min_time() const + { + return m_criterion_params.get_float64("min-time"); + } + void set_min_time(nvbench::float64_t min_time) + { + m_criterion_params.set_float64("min-time", min_time); + } /// @} /// Specify the maximum amount of noise if a measurement supports noise. /// Noise is the relative standard deviation: - /// `noise = stdev / mean_time`. @{ - [[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; } - void set_max_noise(nvbench::float64_t max_noise) { m_max_noise = max_noise; } + /// `noise = stdev / mean_time`. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_max_noise() const + { + return m_criterion_params.get_float64("max-noise"); + } + void set_max_noise(nvbench::float64_t max_noise) + { + m_criterion_params.set_float64("max-noise", max_noise); + } /// @} /// If a warmup run finishes in less than `skip_time`, the measurement will @@ -270,9 +296,11 @@ private: bool m_run_once{false}; bool m_disable_blocking_kernel{false}; + + nvbench::criterion_params m_criterion_params; + std::string m_stopping_criterion; + nvbench::int64_t m_min_samples; - nvbench::float64_t m_min_time; - nvbench::float64_t m_max_noise; nvbench::float64_t m_skip_time; nvbench::float64_t m_timeout; diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 3cf105c..1be48c5 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -36,9 +36,9 @@ state::state(const benchmark_base &bench) : m_benchmark{bench} , m_run_once{bench.get_run_once()} , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()} + , m_criterion_params{bench.get_criterion_params()} + , m_stopping_criterion(bench.get_stopping_criterion()) , m_min_samples{bench.get_min_samples()} - , m_min_time{bench.get_min_time()} - , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} {} @@ -53,9 +53,9 @@ state::state(const benchmark_base &bench, , m_type_config_index{type_config_index} , m_run_once{bench.get_run_once()} , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()} + , m_criterion_params{bench.get_criterion_params()} + , m_stopping_criterion(bench.get_stopping_criterion()) , m_min_samples{bench.get_min_samples()} - , m_min_time{bench.get_min_time()} - , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} {} diff --git a/nvbench/stopping_criterion.cuh b/nvbench/stopping_criterion.cuh new file mode 100644 index 0000000..36fb6eb --- /dev/null +++ b/nvbench/stopping_criterion.cuh @@ -0,0 +1,136 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +#include +#include + +namespace nvbench +{ + +namespace detail +{ + +constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds +constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation + +} // namespace detail + +/** + * Stores all the parameters for stopping criterion in use + */ +class criterion_params +{ + nvbench::named_values m_named_values; +public: + criterion_params(); + criterion_params(std::initializer_list>); + + /** + * Set parameter values from another criterion_params object if they exist + * + * Parameters in `other` that do not correspond to parameters in `this` are ignored. + */ + void set_from(const criterion_params &other); + + void set_int64(std::string name, nvbench::int64_t value); + void set_float64(std::string name, nvbench::float64_t value); + void set_string(std::string name, std::string value); + + [[nodiscard]] std::vector get_names() const; + [[nodiscard]] nvbench::named_values::type get_type(const std::string &name) const; + + [[nodiscard]] bool has_value(const std::string &name) const; + [[nodiscard]] nvbench::int64_t get_int64(const std::string &name) const; + [[nodiscard]] nvbench::float64_t get_float64(const std::string &name) const; + [[nodiscard]] std::string get_string(const std::string &name) const; +}; + +/** + * Stopping criterion interface + */ +class stopping_criterion_base +{ +protected: + std::string m_name; + criterion_params m_params; + +public: + /** + * @param name Unique name of the criterion + * @param params Default values for all parameters of the criterion + */ + explicit stopping_criterion_base(std::string name, criterion_params params) + : m_name{std::move(name)} + , m_params{std::move(params)} + {} + + [[nodiscard]] const std::string &get_name() const { return m_name; } + [[nodiscard]] const criterion_params &get_params() const { return m_params; } + + /** + * Initialize the criterion with the given parameters + * + * This method is called once per benchmark run, before any measurements are provided. + */ + void initialize(const criterion_params ¶ms) + { + m_params.set_from(params); + this->do_initialize(); + } + + /** + * Add the latest measurement to the criterion + */ + void add_measurement(nvbench::float64_t measurement) + { + this->do_add_measurement(measurement); + } + + /** + * Check if the criterion has been met for all measurements processed by `add_measurement` + */ + bool is_finished() + { + return this->do_is_finished(); + } + +protected: + /** + * Initialize the criterion after updaring the parameters + */ + virtual void do_initialize() = 0; + + /** + * Add the latest measurement to the criterion + */ + virtual void do_add_measurement(nvbench::float64_t measurement) = 0; + + /** + * Check if the criterion has been met for all measurements processed by `add_measurement` + */ + virtual bool do_is_finished() = 0; +}; + +} // namespace nvbench diff --git a/nvbench/stopping_criterion.cxx b/nvbench/stopping_criterion.cxx new file mode 100644 index 0000000..976a1a7 --- /dev/null +++ b/nvbench/stopping_criterion.cxx @@ -0,0 +1,124 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + + +namespace nvbench +{ + +// Default constructor for compatibility with old code +criterion_params::criterion_params() + : criterion_params{{"max-noise", nvbench::detail::compat_max_noise()}, + {"min-time", nvbench::detail::compat_min_time()}} +{} + +criterion_params::criterion_params( + std::initializer_list> list) +{ + for (const auto &[name, value] : list) + { + m_named_values.set_value(name, value); + } +} + +void criterion_params::set_from(const criterion_params &other) +{ + for (const std::string &name : this->get_names()) + { + if (other.has_value(name)) + { + if (this->get_type(name) != other.get_type(name)) + { + NVBENCH_THROW(std::runtime_error, + "Mismatched types for named value \"{}\". " + "Expected {}, got {}.", + name, + static_cast(this->get_type(name)), + static_cast(other.get_type(name))); + } + m_named_values.remove_value(name); + m_named_values.set_value(name, other.m_named_values.get_value(name)); + } + } +} + +void criterion_params::set_int64(std::string name, nvbench::int64_t value) +{ + if (m_named_values.has_value(name)) + { + m_named_values.remove_value(name); + } + + m_named_values.set_int64(name, value); +} + +void criterion_params::set_float64(std::string name, nvbench::float64_t value) +{ + if (m_named_values.has_value(name)) + { + m_named_values.remove_value(name); + } + + m_named_values.set_float64(name, value); +} + +void criterion_params::set_string(std::string name, std::string value) +{ + if (m_named_values.has_value(name)) + { + m_named_values.remove_value(name); + } + + m_named_values.set_string(name, std::move(value)); +} + +bool criterion_params::has_value(const std::string &name) const +{ + return m_named_values.has_value(name); +} + +nvbench::int64_t criterion_params::get_int64(const std::string &name) const +{ + return m_named_values.get_int64(name); +} + +nvbench::float64_t criterion_params::get_float64(const std::string &name) const +{ + return m_named_values.get_float64(name); +} + +std::string criterion_params::get_string(const std::string &name) const +{ + return m_named_values.get_string(name); +} + +std::vector criterion_params::get_names() const +{ + return m_named_values.get_names(); +} + +nvbench::named_values::type criterion_params::get_type(const std::string &name) const +{ + return m_named_values.get_type(name); +} + + +} // namespace nvbench::detail diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 4928ebc..1535878 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -4,7 +4,10 @@ set(test_srcs create.cu cuda_timer.cu cpu_timer.cu + criterion_manager.cu + criterion_params.cu enum_type_list.cu + entropy_criterion.cu float64_axis.cu int64_axis.cu named_values.cu @@ -13,7 +16,9 @@ set(test_srcs ring_buffer.cu runner.cu state.cu + statistics.cu state_generator.cu + stdrel_criterion.cu string_axis.cu type_axis.cu type_list.cu diff --git a/testing/criterion_manager.cu b/testing/criterion_manager.cu new file mode 100644 index 0000000..841cd8c --- /dev/null +++ b/testing/criterion_manager.cu @@ -0,0 +1,76 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "test_asserts.cuh" + +void test_standard_criteria_exist() +{ + ASSERT(nvbench::criterion_manager::get().get_criterion("stdrel").get_name() == "stdrel"); + ASSERT(nvbench::criterion_manager::get().get_criterion("entropy").get_name() == "entropy"); +} + +class custom_criterion : public nvbench::stopping_criterion_base +{ +public: + custom_criterion() + : nvbench::stopping_criterion_base("custom", nvbench::criterion_params{}) + {} + +protected: + virtual void do_initialize() override {} + virtual void do_add_measurement(nvbench::float64_t /* measurement */) override {} + virtual bool do_is_finished() override { return true; } +}; + +void test_no_duplicates_are_allowed() +{ + nvbench::criterion_manager& manager = nvbench::criterion_manager::get(); + bool exception_triggered = false; + + try { + nvbench::stopping_criterion_base& custom = manager.get_criterion("custom"); + } catch(...) { + exception_triggered = true; + } + ASSERT(exception_triggered); + + std::unique_ptr custom_ptr = std::make_unique(); + custom_criterion* custom_raw = custom_ptr.get(); + ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw); + + nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom"); + ASSERT(custom_raw == &custom); + + exception_triggered = false; + try { + manager.add(std::make_unique()); + } catch(...) { + exception_triggered = true; + } + ASSERT(exception_triggered); +} + +int main() +{ + test_standard_criteria_exist(); + test_no_duplicates_are_allowed(); +} + diff --git a/testing/criterion_params.cu b/testing/criterion_params.cu new file mode 100644 index 0000000..4eceefa --- /dev/null +++ b/testing/criterion_params.cu @@ -0,0 +1,63 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "test_asserts.cuh" + +void test_compat_parameters() +{ + nvbench::criterion_params params; + + ASSERT(params.has_value("max-noise")); + ASSERT(params.has_value("min-time")); + + ASSERT(params.get_float64("max-noise") == nvbench::detail::compat_max_noise()); + ASSERT(params.get_float64("min-time") == nvbench::detail::compat_min_time()); +} + +void test_compat_overwrite() +{ + nvbench::criterion_params params; + params.set_float64("max-noise", 40000.0); + params.set_float64("min-time", 42000.0); + + ASSERT(params.get_float64("max-noise") == 40000.0); + ASSERT(params.get_float64("min-time") == 42000.0); +} + +void test_overwrite() +{ + nvbench::criterion_params params; + ASSERT(!params.has_value("custom")); + + params.set_float64("custom", 42.0); + ASSERT(params.get_float64("custom") == 42.0); + + params.set_float64("custom", 4.2); + ASSERT(params.get_float64("custom") == 4.2); +} + +int main() +{ + test_compat_parameters(); + test_compat_overwrite(); + test_overwrite(); +} + diff --git a/testing/entropy_criterion.cu b/testing/entropy_criterion.cu new file mode 100644 index 0000000..df489c9 --- /dev/null +++ b/testing/entropy_criterion.cu @@ -0,0 +1,91 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "test_asserts.cuh" + +#include +#include +#include + +void test_const() +{ + nvbench::criterion_params params; + nvbench::detail::entropy_criterion criterion; + + criterion.initialize(params); + for (int i = 0; i < 6; i++) + { // nvbench wants at least 5 to compute the standard deviation + criterion.add_measurement(42.0); + } + ASSERT(criterion.is_finished()); +} + +void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion) +{ + /* + * This pattern is designed to simulate the entropy: + * + * 0.0, 1.0, 1.5, 2.0, 2.3, 2.5 <---- no unexpected measurement after this point + * 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+ + * 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 | + * 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 | + * 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample, + * 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative + * 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 | + * 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+ + */ + for (nvbench::float64_t x = 0.0; x < 50.0; x += 1.0) + { + criterion.add_measurement(x > 5.0 ? 5.0 : x); + } +} + +void test_entropy_arch() +{ + nvbench::detail::entropy_criterion criterion; + + // The R2 should be around 0.5 + // The angle should be around -1.83 + nvbench::criterion_params params; + params.set_float64("min-r2", 0.3); + params.set_float64("max-angle", -1.0); + criterion.initialize(params); + produce_entropy_arch(criterion); + ASSERT(criterion.is_finished()); + + params.set_float64("min-r2", 0.7); + criterion.initialize(params); + produce_entropy_arch(criterion); + ASSERT(!criterion.is_finished()); + + params.set_float64("min-r2", 0.3); + params.set_float64("max-angle", -2.0); + criterion.initialize(params); + produce_entropy_arch(criterion); + ASSERT(!criterion.is_finished()); +} + +int main() +{ + test_const(); + test_entropy_arch(); +} diff --git a/testing/option_parser.cu b/testing/option_parser.cu index 064d086..167e833 100644 --- a/testing/option_parser.cu +++ b/testing/option_parser.cu @@ -1229,6 +1229,27 @@ void test_timeout() ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.); } +void test_stopping_criterion() +{ + nvbench::option_parser parser; + parser.parse( + {"--benchmark", "DummyBench", + "--stopping-criterion", "entropy", + "--max-angle", "0.42", + "--min-r2", "0.6"}); + const auto& states = parser_to_states(parser); + + ASSERT(states.size() == 1); + ASSERT(states[0].get_stopping_criterion() == "entropy"); + + const nvbench::criterion_params &criterion_params = states[0].get_criterion_params(); + ASSERT(criterion_params.has_value("max-angle")); + ASSERT(criterion_params.has_value("min-r2")); + + ASSERT(criterion_params.get_float64("max-angle") == 0.42); + ASSERT(criterion_params.get_float64("min-r2") == 0.6); +} + int main() try { @@ -1265,6 +1286,8 @@ try test_skip_time(); test_timeout(); + test_stopping_criterion(); + return 0; } catch (std::exception &err) diff --git a/testing/ring_buffer.cu b/testing/ring_buffer.cu index 4e13805..5af5343 100644 --- a/testing/ring_buffer.cu +++ b/testing/ring_buffer.cu @@ -27,7 +27,7 @@ template bool equal(const nvbench::detail::ring_buffer &buffer, const std::vector &reference) { - return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin()); + return std::equal(buffer.begin(), buffer.end(), reference.begin()); } int main() @@ -62,12 +62,12 @@ try ASSERT(avg.size() == 3); ASSERT(avg.capacity() == 3); ASSERT_MSG(avg.back() == 5, " (got {})", avg.back()); - ASSERT(equal(avg, {5, 2, -15})); + ASSERT(equal(avg, {2, -15, 5})); avg.push_back(0); ASSERT(avg.size() == 3); ASSERT(avg.capacity() == 3); - ASSERT(equal(avg, {5, 0, -15})); + ASSERT(equal(avg, {-15, 5, 0})); ASSERT_MSG(avg.back() == 0, " (got {})", avg.back()); avg.push_back(128); diff --git a/testing/statistics.cu b/testing/statistics.cu new file mode 100644 index 0000000..50e1014 --- /dev/null +++ b/testing/statistics.cu @@ -0,0 +1,128 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "test_asserts.cuh" + +#include + +namespace statistics = nvbench::detail::statistics; + +void test_mean() +{ + { + std::vector data{1.0, 2.0, 3.0, 4.0, 5.0}; + const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data)); + const nvbench::float64_t expected = 3.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + + { + std::vector data; + const bool finite = std::isfinite(statistics::compute_mean(std::begin(data), std::end(data))); + ASSERT(!finite); + } +} + +void test_std() +{ + std::vector data{1.0, 2.0, 3.0, 4.0, 5.0}; + const nvbench::float64_t mean = 3.0; + const nvbench::float64_t actual = statistics::standard_deviation(std::begin(data), std::end(data), mean); + const nvbench::float64_t expected = 1.581; + ASSERT(std::abs(actual - expected) < 0.001); +} + +void test_lin_regression() +{ + { + std::vector ys{1.0, 2.0, 3.0, 4.0, 5.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + ASSERT(slope == 1.0); + ASSERT(intercept == 1.0); + } + { + std::vector ys{42.0, 42.0, 42.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + ASSERT(slope == 0.0); + ASSERT(intercept == 42.0); + } + { + std::vector ys{8.0, 4.0, 0.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + ASSERT(slope == -4.0); + ASSERT(intercept == 8.0); + } +} + +void test_r2() +{ + { + std::vector ys{1.0, 2.0, 3.0, 4.0, 5.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); + const nvbench::float64_t expected = 1.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + { + std::vector signal{1.0, 2.0, 3.0, 4.0, 5.0}; + std::vector noise{-1.0, 1.0, -1.0, 1.0, -1.0}; + std::vector ys(signal.size()); + + std::transform(std::begin(signal), + std::end(signal), + std::begin(noise), + std::begin(ys), + std::plus()); + + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + const nvbench::float64_t expected = 0.675; + const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); + ASSERT(std::abs(actual - expected) < 0.001); + } +} + +void test_slope_conversion() +{ + { + const nvbench::float64_t actual = statistics::slope2deg(0.0); + const nvbench::float64_t expected = 0.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + { + const nvbench::float64_t actual = statistics::slope2deg(1.0); + const nvbench::float64_t expected = 45.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + { + const nvbench::float64_t actual = statistics::slope2deg(5.0); + const nvbench::float64_t expected = 78.69; + ASSERT(std::abs(actual - expected) < 0.001); + } +} + +int main() +{ + test_mean(); + test_std(); + test_lin_regression(); + test_r2(); + test_slope_conversion(); +} diff --git a/testing/stdrel_criterion.cu b/testing/stdrel_criterion.cu new file mode 100644 index 0000000..57bdfdd --- /dev/null +++ b/testing/stdrel_criterion.cu @@ -0,0 +1,84 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "test_asserts.cuh" + +#include +#include +#include + +void test_const() +{ + nvbench::criterion_params params; + nvbench::detail::stdrel_criterion criterion; + + criterion.initialize(params); + for (int i = 0; i < 5; i++) + { // nvbench wants at least 5 to compute the standard deviation + criterion.add_measurement(42.0); + } + ASSERT(criterion.is_finished()); +} + +std::vector generate(double mean, double rel_std_dev, int size) +{ + std::random_device rd; + std::mt19937 gen(rd()); + std::vector v(size); + std::normal_distribution dist(mean, mean * rel_std_dev); + std::generate(v.begin(), v.end(), [&]{ return dist(gen); }); + return v; +} + +void test_stdrel() +{ + const nvbench::int64_t size = 10; + const nvbench::float64_t mean = 42.0; + const nvbench::float64_t max_noise = 0.1; + + nvbench::criterion_params params; + params.set_float64("max-noise", max_noise); + + nvbench::detail::stdrel_criterion criterion; + criterion.initialize(params); + + for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size)) + { + criterion.add_measurement(measurement); + } + ASSERT(criterion.is_finished()); + + params.set_float64("max-noise", max_noise); + criterion.initialize(params); + + for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size)) + { + criterion.add_measurement(measurement); + } + ASSERT(!criterion.is_finished()); +} + +int main() +{ + test_const(); + test_stdrel(); +}