Merge pull request #151 from gevtushenko/entropy

Entropy-based stopping criterion
2026-05-11 00:40:00 +00:00 · 2024-01-12 23:19:25 +04:00
parent 7521229872 aebbeb371b
commit d8dced8a64
32 changed files with 1867 additions and 121 deletions
--- a/.clangd
+++ b/.clangd
@@ -0,0 +1,63 @@
+# https://clangd.llvm.org/config
+
+# Apply a config conditionally to all C files
+If:
+  PathMatch: .*\.(c|h)$
+
+---
+
+# Apply a config conditionally to all C++ files
+If:
+  PathMatch: .*\.(c|h)pp
+
+---
+
+# Apply a config conditionally to all CUDA files
+If:
+  PathMatch: .*\.cuh?
+CompileFlags:
+  Add:
+    # Allow variadic CUDA functions
+    - "-Xclang=-fcuda-allow-variadic-functions"
+
+---
+
+# Tweak the clangd parse settings for all files
+CompileFlags:
+  Compiler: clang++
+  CompilationDatabase: .
+  Add:
+    - -x
+    - cuda
+    # report all errors
+    - "-ferror-limit=0"
+    - "-ftemplate-backtrace-limit=0"
+    - "-stdlib=libc++"
+    - "-std=c++17"
+  Remove:
+    # strip CUDA fatbin args
+    - "-Xfatbin*"
+    - "-Xcompiler*"
+    - "-Xcudafe*"
+    - "-rdc=*"
+    - "-gpu=*"
+    - "--diag_suppress*"
+    # strip CUDA arch flags
+    - "-gencode*"
+    - "--generate-code*"
+    # strip gcc's -fcoroutines
+    - -fcoroutines
+    # strip CUDA flags unknown to clang
+    - "-ccbin*"
+    - "--compiler-options*"
+    - "--expt-extended-lambda"
+    - "--expt-relaxed-constexpr"
+    - "-forward-unknown-to-host-compiler"
+    - "-Werror=cross-execution-space-call"
+Diagnostics:
+  Suppress:
+    - "variadic_device_fn"
+    - "attributes_not_allowed"
+    # The NVHPC version of _NVCXX_EXPAND_PACK macro triggers this clang error.
+    # Temporarily suppressing it, but should probably fix
+    - "template_param_shadow"
--- a/docs/cli_help.md
+++ b/docs/cli_help.md
@@ -89,8 +89,15 @@
  * Applies to the most recent `--benchmark`, or all benchmarks if specified
    before any `--benchmark` arguments.

+* `--stopping-criterion <criterion>`
+  * After `--min-samples` is satisfied, use `<criterion>` to detect if enough 
+    samples were collected.
+  * Only applies to Cold measurements.
+  * Default is stdrel (`--stopping-criterion stdrel`)
+
 * `--min-time <seconds>`
  * Accumulate at least `<seconds>` of execution time per measurement.
+  * Only applies to `stdrel` stopping criterion.
  * Default is 0.5 seconds.
  * If both GPU and CPU times are gathered, this applies to GPU time only.
  * Applies to the most recent `--benchmark`, or all benchmarks if specified
@@ -100,6 +107,7 @@
  * Gather samples until the error in the measurement drops below `<value>`.
  * Noise is specified as the percent relative standard deviation.
  * Default is 0.5% (`--max-noise 0.5`)
+  * Only applies to `stdrel` stopping criterion.
  * Only applies to Cold measurements.
  * If both GPU and CPU times are gathered, this applies to GPU noise only.
  * Applies to the most recent `--benchmark`, or all benchmarks if specified
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,12 +1,13 @@
 set(example_srcs
+  auto_throughput.cu
  axes.cu
+  custom_criterion.cu
  enums.cu
  exec_tag_sync.cu
  exec_tag_timer.cu
  skip.cu
  stream.cu
  throughput.cu
-  auto_throughput.cu
 )

 # Metatarget for all examples:
--- a/examples/custom_criterion.cu
+++ b/examples/custom_criterion.cu
@@ -0,0 +1,80 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/nvbench.cuh>
+
+// Grab some testing kernels from NVBench:
+#include <nvbench/test_kernels.cuh>
+
+// Thrust vectors simplify memory management:
+#include <thrust/device_vector.h>
+
+// Inherit from the stopping_criterion_base class:
+class fixed_criterion final : public nvbench::stopping_criterion_base
+{
+  nvbench::int64_t m_num_samples{};
+
+public:
+  fixed_criterion()
+      : nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}}
+  {}
+
+protected:
+  // Setup the criterion in the `do_initialize()` method:
+  virtual void do_initialize() override 
+  {
+    m_num_samples = 0;
+  }
+
+  // Process new measurements in the `add_measurement()` method:
+  virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
+  {
+    m_num_samples++;
+  }
+
+  // Check if the stopping criterion is met in the `is_finished()` method:
+  virtual bool do_is_finished() override
+  {
+    return m_num_samples >= m_params.get_int64("max-samples");
+  }
+
+};
+
+// Register the criterion with NVBench:
+NVBENCH_REGISTER_CRITERION(fixed_criterion);
+
+void throughput_bench(nvbench::state &state)
+{
+  // Allocate input data:
+  const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t);
+  thrust::device_vector<nvbench::int32_t> input(num_values);
+  thrust::device_vector<nvbench::int32_t> output(num_values);
+
+  // Provide throughput information:
+  state.add_element_count(num_values, "NumElements");
+  state.add_global_memory_reads<nvbench::int32_t>(num_values, "DataSize");
+  state.add_global_memory_writes<nvbench::int32_t>(num_values);
+
+  state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
+    nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
+      thrust::raw_pointer_cast(input.data()),
+      thrust::raw_pointer_cast(output.data()),
+      num_values);
+  });
+}
+NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed");
--- a/nvbench/CMakeLists.txt
+++ b/nvbench/CMakeLists.txt
@@ -4,6 +4,7 @@ set(srcs
  benchmark_base.cxx
  benchmark_manager.cxx
  blocking_kernel.cu
+  criterion_manager.cxx
  csv_printer.cu
  cuda_call.cu
  device_info.cu
@@ -17,13 +18,16 @@ set(srcs
  printer_multiplex.cxx
  runner.cxx
  state.cxx
+  stopping_criterion.cxx
  string_axis.cxx
  type_axis.cxx
  type_strings.cxx

+  detail/entropy_criterion.cxx
  detail/measure_cold.cu
  detail/measure_hot.cu
  detail/state_generator.cxx
+  detail/stdrel_criterion.cxx

  internal/nvml.cxx
 )
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -22,6 +22,7 @@
 #include <nvbench/device_info.cuh>
 #include <nvbench/device_manager.cuh>
 #include <nvbench/state.cuh>
+#include <nvbench/stopping_criterion.cuh>

 #include <functional> // reference_wrapper, ref
 #include <memory>
@@ -181,22 +182,30 @@ struct benchmark_base
  }
  /// @}

-  /// Accumulate at least this many seconds of timing data per measurement. @{
-  [[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
+  /// Accumulate at least this many seconds of timing data per measurement.
+  /// Only applies to `stdrel` stopping criterion. @{
+  [[nodiscard]] nvbench::float64_t get_min_time() const
+  {
+    return m_criterion_params.get_float64("min-time");
+  }
  benchmark_base &set_min_time(nvbench::float64_t min_time)
  {
-    m_min_time = min_time;
+    m_criterion_params.set_float64("min-time", min_time);
    return *this;
  }
  /// @}

  /// Specify the maximum amount of noise if a measurement supports noise.
  /// Noise is the relative standard deviation:
-  /// `noise = stdev / mean_time`. @{
-  [[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
+  /// `noise = stdev / mean_time`. 
+  /// Only applies to `stdrel` stopping criterion. @{
+  [[nodiscard]] nvbench::float64_t get_max_noise() const
+  {
+    return m_criterion_params.get_float64("max-noise");
+  }
  benchmark_base &set_max_noise(nvbench::float64_t max_noise)
  {
-    m_max_noise = max_noise;
+    m_criterion_params.set_float64("max-noise", max_noise);
    return *this;
  }
  /// @}
@@ -230,6 +239,19 @@ struct benchmark_base
  }
  /// @}

+  [[nodiscard]] nvbench::criterion_params& get_criterion_params() { return m_criterion_params; }
+  [[nodiscard]] const nvbench::criterion_params& get_criterion_params() const { return m_criterion_params; }
+
+  /// Control the stopping criterion for the measurement loop.
+  /// @{
+  [[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; }
+  benchmark_base &set_stopping_criterion(std::string criterion)
+  {
+    m_stopping_criterion = std::move(criterion);
+    return *this;
+  }
+  /// @}
+
 protected:
  friend struct nvbench::runner_base;

@@ -247,12 +269,13 @@ protected:
  bool m_disable_blocking_kernel{false};

  nvbench::int64_t m_min_samples{10};
-  nvbench::float64_t m_min_time{0.5};
-  nvbench::float64_t m_max_noise{0.005}; // 0.5% relative standard deviation

  nvbench::float64_t m_skip_time{-1.};
  nvbench::float64_t m_timeout{15.};

+  nvbench::criterion_params m_criterion_params;
+  std::string m_stopping_criterion{"stdrel"};
+
 private:
  // route these through virtuals so the templated subclass can inject type info
  virtual std::unique_ptr<benchmark_base> do_clone() const            = 0;
--- a/nvbench/benchmark_base.cxx
+++ b/nvbench/benchmark_base.cxx
@@ -34,13 +34,14 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
  result->m_axes    = m_axes;
  result->m_devices = m_devices;

-  result->m_min_samples = m_min_samples;
-  result->m_min_time    = m_min_time;
-  result->m_max_noise   = m_max_noise;
+  result->m_min_samples      = m_min_samples;
+  result->m_criterion_params = m_criterion_params;

  result->m_skip_time = m_skip_time;
  result->m_timeout   = m_timeout;

+  result->m_stopping_criterion = m_stopping_criterion;
+
  return result;
 }

--- a/nvbench/criterion_manager.cuh
+++ b/nvbench/criterion_manager.cuh
@@ -0,0 +1,65 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/detail/entropy_criterion.cuh>
+#include <nvbench/detail/stdrel_criterion.cuh>
+#include <nvbench/stopping_criterion.cuh>
+#include <nvbench/types.cuh>
+
+#include <memory>
+
+#include <unordered_map>
+
+namespace nvbench
+{
+
+class criterion_manager
+{
+  std::unordered_map<std::string, std::unique_ptr<nvbench::stopping_criterion_base>> m_map;
+
+  criterion_manager();
+
+public:
+  /**
+   * @return The singleton criterion_manager instance.
+   */
+  static criterion_manager& get();
+
+  /**
+   * Register a new stopping criterion.
+   */
+  nvbench::stopping_criterion_base& add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
+  nvbench::stopping_criterion_base& get_criterion(const std::string& name);
+  const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const;
+
+  using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
+  params_description get_params_description() const;
+};
+
+/**
+ * Given a stopping criterion type `TYPE`, registers it in the criterion manager
+ *
+ * See the `custom_criterion.cu` example for usage.
+ */
+#define NVBENCH_REGISTER_CRITERION(TYPE)                                                           \
+  static nvbench::stopping_criterion_base &NVBENCH_UNIQUE_IDENTIFIER(TYPE) =                       \
+    nvbench::criterion_manager::get().add(std::make_unique<TYPE>())
+
+} // namespace nvbench
--- a/nvbench/criterion_manager.cxx
+++ b/nvbench/criterion_manager.cxx
@@ -0,0 +1,100 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/criterion_manager.cuh>
+#include <nvbench/detail/throw.cuh>
+
+namespace nvbench
+{
+
+criterion_manager::criterion_manager()
+{
+  m_map.emplace("stdrel", std::make_unique<nvbench::detail::stdrel_criterion>());
+  m_map.emplace("entropy", std::make_unique<nvbench::detail::entropy_criterion>());
+}
+
+criterion_manager &criterion_manager::get()
+{
+  static criterion_manager registry;
+  return registry;
+}
+
+stopping_criterion_base& criterion_manager::get_criterion(const std::string& name)
+{
+  auto iter = m_map.find(name);
+  if (iter == m_map.end())
+  {
+    NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name);
+  }
+  return *iter->second.get();
+}
+
+const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const
+{
+  auto iter = m_map.find(name);
+  if (iter == m_map.end())
+  {
+    NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name);
+  }
+  return *iter->second.get();
+}
+
+stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criterion_base> criterion)
+{
+  const std::string name = criterion->get_name();
+
+  auto [it, success] = m_map.emplace(name, std::move(criterion));
+
+  if (!success) 
+  {
+    NVBENCH_THROW(std::runtime_error,
+                  "Stopping criterion \"{}\" is already registered.", name);
+  }
+
+  return *it->second.get();
+}
+
+nvbench::criterion_manager::params_description criterion_manager::get_params_description() const
+{
+  nvbench::criterion_manager::params_description desc;
+
+  for (auto &[criterion_name, criterion] : m_map)
+  {
+    nvbench::criterion_params params = criterion->get_params();
+
+    for (auto param : params.get_names())
+    {
+      nvbench::named_values::type type = params.get_type(param);
+      if (std::find_if(desc.begin(), desc.end(), [&](auto d) {
+            return d.first == param && d.second != type;
+          }) != desc.end())
+      {
+        NVBENCH_THROW(std::runtime_error,
+                      "Stopping criterion \"{}\" parameter \"{}\" is already used by another "
+                      "criterion with a different type.",
+                      criterion_name,
+                      param);
+      }
+      desc.emplace_back(param, type);
+    }
+  }
+
+  return desc;
+}
+
+} // namespace nvbench
--- a/nvbench/detail/entropy_criterion.cuh
+++ b/nvbench/detail/entropy_criterion.cuh
@@ -0,0 +1,55 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/types.cuh>
+#include <nvbench/stopping_criterion.cuh>
+#include <nvbench/detail/ring_buffer.cuh>
+
+#include <vector>
+
+namespace nvbench::detail
+{
+
+class entropy_criterion final : public stopping_criterion_base
+{
+  // state
+  nvbench::int64_t m_total_samples{};
+  nvbench::float64_t m_total_cuda_time{};
+  std::vector<std::pair<nvbench::float64_t, nvbench::int64_t>> m_freq_tracker;
+
+  // TODO The window size should be user-configurable
+  nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299};
+
+  // Used to avoid re-allocating temporary memory
+  std::vector<nvbench::float64_t> m_probabilities; 
+
+  nvbench::float64_t compute_entropy();
+
+public:
+  entropy_criterion();
+
+protected:
+  virtual void do_initialize() override;
+  virtual void do_add_measurement(nvbench::float64_t measurement) override;
+  virtual bool do_is_finished() override;
+  
+};
+
+} // namespace nvbench::detail
--- a/nvbench/detail/entropy_criterion.cxx
+++ b/nvbench/detail/entropy_criterion.cxx
@@ -0,0 +1,137 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/detail/entropy_criterion.cuh>
+#include <nvbench/types.cuh>
+
+#include <cmath>
+
+
+namespace nvbench::detail
+{
+
+entropy_criterion::entropy_criterion()
+    : stopping_criterion_base{"entropy", {{"max-angle", 0.048}, {"min-r2", 0.36}}}
+{
+  m_freq_tracker.reserve(m_entropy_tracker.capacity() * 2);
+  m_probabilities.reserve(m_entropy_tracker.capacity() * 2);
+}
+
+void entropy_criterion::do_initialize()
+{
+  m_total_samples   = 0;
+  m_total_cuda_time = 0.0;
+  m_entropy_tracker.clear();
+  m_freq_tracker.clear();
+}
+
+nvbench::float64_t entropy_criterion::compute_entropy() 
+{
+  const std::size_t n = m_freq_tracker.size();
+  if (n == 0)
+  {
+    return 0.0;
+  }
+
+  m_probabilities.resize(n);
+  for (std::size_t i = 0; i < n; i++)
+  {
+    m_probabilities[i] = static_cast<nvbench::float64_t>(m_freq_tracker[i].second) /
+                         static_cast<nvbench::float64_t>(m_total_samples);
+  }
+
+  nvbench::float64_t entropy{};
+  for (nvbench::float64_t p : m_probabilities)
+  {
+    entropy -= p * std::log2(p);
+  }
+
+  return entropy;
+}
+
+void entropy_criterion::do_add_measurement(nvbench::float64_t measurement)
+{
+  m_total_samples++;
+  m_total_cuda_time += measurement;
+
+  {
+    auto key = measurement;
+    constexpr bool bin_keys = false;
+
+    if (bin_keys) 
+    {
+      const auto resolution_us = 0.5;
+      const auto resulution_s = resolution_us / 1'000'000;
+      const auto epsilon = resulution_s * 2;
+      key = std::round(key / epsilon) * epsilon;
+    }
+
+    // This approach is about 3x faster than `std::{unordered_,}map`
+    // Up to 100k samples, only about 20% slower than corresponding stdrel method
+    auto it = std::lower_bound(m_freq_tracker.begin(),
+                               m_freq_tracker.end(),
+                               std::make_pair(key, nvbench::int64_t{}));
+
+    if (it != m_freq_tracker.end() && it->first == key)
+    {
+      it->second += 1;
+    }
+    else
+    {
+      m_freq_tracker.insert(it, std::make_pair(key, nvbench::int64_t{1}));
+    }
+  }
+
+  m_entropy_tracker.push_back(compute_entropy());
+}
+
+bool entropy_criterion::do_is_finished()
+{
+  if (m_entropy_tracker.size() < 2)
+  {
+    return false;
+  }
+
+  // Even number of samples is used to reduce the overhead and not required to compute entropy.
+  // This makes `is_finished()` about 20% faster than corresponding stdrel method.
+  if (m_total_samples % 2 != 0)
+  {
+    return false;
+  }
+
+  auto begin = m_entropy_tracker.cbegin();
+  auto end   = m_entropy_tracker.cend();
+  auto mean  = statistics::compute_mean(begin, end);
+
+  const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
+
+  if (statistics::slope2deg(slope) > m_params.get_float64("max-angle")) 
+  {
+    return false;
+  }
+
+  const auto r2 = statistics::compute_r2(begin, end, mean, slope, intercept);
+  if (r2 < m_params.get_float64("min-r2"))
+  {
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace nvbench::detail
--- a/nvbench/detail/measure_cold.cu
+++ b/nvbench/detail/measure_cold.cu
@@ -16,38 +16,37 @@
 *  limitations under the License.
 */

-#include <nvbench/detail/measure_cold.cuh>
-
 #include <nvbench/benchmark_base.cuh>
+#include <nvbench/criterion_manager.cuh>
+#include <nvbench/detail/measure_cold.cuh>
+#include <nvbench/detail/throw.cuh>
 #include <nvbench/device_info.cuh>
 #include <nvbench/printer_base.cuh>
 #include <nvbench/state.cuh>
 #include <nvbench/summary.cuh>

-#include <nvbench/detail/ring_buffer.cuh>
-#include <nvbench/detail/throw.cuh>
-
 #include <fmt/format.h>

-#include <algorithm>
-#include <cstdio>
-#include <stdexcept>
-#include <variant>
-
 namespace nvbench::detail
 {

 measure_cold_base::measure_cold_base(state &exec_state)
    : m_state{exec_state}
    , m_launch{m_state.get_cuda_stream()}
+    , m_criterion_params{exec_state.get_criterion_params()}
+    , m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())}
    , m_run_once{exec_state.get_run_once()}
    , m_no_block{exec_state.get_disable_blocking_kernel()}
    , m_min_samples{exec_state.get_min_samples()}
-    , m_max_noise{exec_state.get_max_noise()}
-    , m_min_time{exec_state.get_min_time()}
    , m_skip_time{exec_state.get_skip_time()}
    , m_timeout{exec_state.get_timeout()}
-{}
+{
+  if (m_min_samples > 0)
+  {
+    m_cuda_times.reserve(m_min_samples);
+    m_cpu_times.reserve(m_min_samples);
+  }
+}

 void measure_cold_base::check()
 {
@@ -68,10 +67,11 @@ void measure_cold_base::initialize()
  m_total_cpu_time  = 0.;
  m_cpu_noise       = 0.;
  m_total_samples   = 0;
-  m_noise_tracker.clear();
  m_cuda_times.clear();
  m_cpu_times.clear();
  m_max_time_exceeded = false;
+
+  m_stopping_criterion.initialize(m_criterion_params);
 }

 void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
@@ -87,16 +87,7 @@ void measure_cold_base::record_measurements()
  m_total_cpu_time += cur_cpu_time;
  ++m_total_samples;

-  // Compute convergence statistics using CUDA timings:
-  const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
-  const auto cuda_stdev     = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
-                                                                          m_cuda_times.cend(),
-                                                                          mean_cuda_time);
-  auto cuda_rel_stdev       = cuda_stdev / mean_cuda_time;
-  if (std::isfinite(cuda_rel_stdev))
-  {
-    m_noise_tracker.push_back(cuda_rel_stdev);
-  }
+  m_stopping_criterion.add_measurement(cur_cuda_time);
 }

 bool measure_cold_base::is_finished()
@@ -107,39 +98,12 @@ bool measure_cold_base::is_finished()
  }

  // Check that we've gathered enough samples:
-  if (m_total_cuda_time > m_min_time && m_total_samples > m_min_samples)
+  if (m_total_samples > m_min_samples)
  {
-    // Noise has dropped below threshold
-    if (m_noise_tracker.back() < m_max_noise)
+    if (m_stopping_criterion.is_finished())
    {
      return true;
    }
-
-    // Check if the noise (cuda rel stdev) has converged by inspecting a
-    // trailing window of recorded noise measurements.
-    // This helps identify benchmarks that are inherently noisy and would
-    // never converge to the target stdev threshold. This check ensures that the
-    // benchmark will end if the stdev stabilizes above the target threshold.
-    // Gather some iterations before checking noise, and limit how often we
-    // check this.
-    if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
-    {
-      // Use the current noise as the stdev reference.
-      const auto current_noise = m_noise_tracker.back();
-      const auto noise_stdev =
-        nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(),
-                                                        m_noise_tracker.cend(),
-                                                        current_noise);
-      const auto noise_rel_stdev = noise_stdev / current_noise;
-
-      // If the rel stdev of the last N cuda noise measurements is less than
-      // 5%, consider the result stable.
-      const auto noise_threshold = 0.05;
-      if (noise_rel_stdev < noise_threshold)
-      {
-        return true;
-      }
-    }
  }

  // Check for timeouts:
@@ -206,14 +170,21 @@ void measure_cold_base::generate_summaries()
    summ.set_float64("value", avg_cuda_time);
  }

+  const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
+  const auto cuda_stdev     = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
+                                                                          m_cuda_times.cend(),
+                                                                          mean_cuda_time);
+  const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
+  const auto noise = cuda_rel_stdev;
+  const auto max_noise = m_criterion_params.get_float64("max-noise");
+  const auto min_time = m_criterion_params.get_float64("min-time");
+
  {
    auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative");
    summ.set_string("name", "Noise");
    summ.set_string("hint", "percentage");
    summ.set_string("description", "Relative standard deviation of isolated GPU times");
-    summ.set_float64("value",
-                     m_noise_tracker.empty() ? std::numeric_limits<nvbench::float64_t>::infinity()
-                                             : m_noise_tracker.back());
+    summ.set_float64("value", noise);
  }

  if (const auto items = m_state.get_element_count(); items != 0)
@@ -270,15 +241,15 @@ void measure_cold_base::generate_summaries()
    {
      const auto timeout = m_walltime_timer.get_duration();

-      if (!m_noise_tracker.empty() && m_noise_tracker.back() > m_max_noise)
+      if (noise > max_noise)
      {
        printer.log(nvbench::log_level::warn,
                    fmt::format("Current measurement timed out ({:0.2f}s) "
                                "while over noise threshold ({:0.2f}% > "
                                "{:0.2f}%)",
                                timeout,
-                                m_noise_tracker.back() * 100,
-                                m_max_noise * 100));
+                                noise * 100,
+                                max_noise * 100));
      }
      if (m_total_samples < m_min_samples)
      {
@@ -289,7 +260,7 @@ void measure_cold_base::generate_summaries()
                                m_total_samples,
                                m_min_samples));
      }
-      if (m_total_cuda_time < m_min_time)
+      if (m_total_cuda_time < min_time)
      {
        printer.log(nvbench::log_level::warn,
                    fmt::format("Current measurement timed out ({:0.2f}s) "
@@ -297,7 +268,7 @@ void measure_cold_base::generate_summaries()
                                "{:0.2f}s)",
                                timeout,
                                m_total_cuda_time,
-                                m_min_time));
+                                min_time));
      }
    }

--- a/nvbench/detail/measure_cold.cuh
+++ b/nvbench/detail/measure_cold.cuh
@@ -25,15 +25,14 @@
 #include <nvbench/device_info.cuh>
 #include <nvbench/exec_tag.cuh>
 #include <nvbench/launch.cuh>
+#include <nvbench/stopping_criterion.cuh>

 #include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
 #include <nvbench/detail/l2flush.cuh>
-#include <nvbench/detail/ring_buffer.cuh>
 #include <nvbench/detail/statistics.cuh>

 #include <cuda_runtime.h>

-#include <algorithm>
 #include <utility>
 #include <vector>

@@ -87,12 +86,13 @@ protected:
  nvbench::detail::l2flush m_l2flush;
  nvbench::blocking_kernel m_blocker;

+  nvbench::criterion_params m_criterion_params;
+  nvbench::stopping_criterion_base& m_stopping_criterion;
+
  bool m_run_once{false};
  bool m_no_block{false};

  nvbench::int64_t m_min_samples{};
-  nvbench::float64_t m_max_noise{}; // rel stdev
-  nvbench::float64_t m_min_time{};

  nvbench::float64_t m_skip_time{};
  nvbench::float64_t m_timeout{};
@@ -102,9 +102,6 @@ protected:
  nvbench::float64_t m_total_cpu_time{};
  nvbench::float64_t m_cpu_noise{}; // rel stdev

-  // Trailing history of noise measurements for convergence tests
-  nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
-
  std::vector<nvbench::float64_t> m_cuda_times;
  std::vector<nvbench::float64_t> m_cpu_times;

--- a/nvbench/detail/ring_buffer.cuh
+++ b/nvbench/detail/ring_buffer.cuh
@@ -22,12 +22,91 @@

 #include <nvbench/detail/statistics.cuh>

+#include <cstddef>
+#include <iterator>
 #include <cassert>
 #include <vector>

 namespace nvbench::detail
 {

+template <class T>
+class ring_buffer_iterator
+{
+  std::ptrdiff_t m_index;
+  std::ptrdiff_t m_capacity;
+  T *m_ptr;
+
+public:
+  using iterator_category = std::random_access_iterator_tag;
+  using value_type        = T;
+  using difference_type   = std::ptrdiff_t;
+  using pointer           = T *;
+  using reference         = T &;
+
+  ring_buffer_iterator(std::ptrdiff_t index, std::ptrdiff_t capacity, pointer ptr)
+      : m_index{index}
+      , m_capacity{capacity}
+      , m_ptr{ptr}
+  {}
+
+  ring_buffer_iterator operator++()
+  {
+    ++m_index;
+    return *this;
+  }
+
+  ring_buffer_iterator operator++(int)
+  {
+    ring_buffer_iterator temp = *this;
+    ++(*this);
+    return temp;
+  }
+
+  ring_buffer_iterator &operator--()
+  {
+    --m_index;
+    return *this;
+  }
+
+  ring_buffer_iterator operator--(int)
+  {
+    ring_buffer_iterator temp = *this;
+    --(*this);
+    return temp;
+  }
+
+  ring_buffer_iterator operator+(difference_type n) const 
+  { 
+    return ring_buffer_iterator(m_index + n, m_capacity, m_ptr); 
+  }
+
+  ring_buffer_iterator operator-(difference_type n) const 
+  { 
+    return ring_buffer_iterator(m_index - n, m_capacity, m_ptr); 
+  }
+
+  difference_type operator-(const ring_buffer_iterator &other) const
+  {
+    return m_index - other.m_index;
+  }
+
+  reference operator*() const { return m_ptr[m_index % m_capacity]; }
+  pointer operator->() const { return &(operator*()); }
+
+  reference operator[](difference_type n) const { return *(*this + n); }
+
+  bool operator==(const ring_buffer_iterator &other) const
+  {
+    return m_ptr == other.m_ptr && m_index == other.m_index;
+  }
+  bool operator!=(const ring_buffer_iterator &other) const { return !(*this == other); }
+  bool operator<(const ring_buffer_iterator &other) const { return m_index < other.m_index; }
+  bool operator>(const ring_buffer_iterator &other) const { return m_index > other.m_index; }
+  bool operator<=(const ring_buffer_iterator &other) const { return !(*this > other); }
+  bool operator>=(const ring_buffer_iterator &other) const { return !(*this < other); }
+};
+
 /**
 * @brief A simple, dynamically sized ring buffer.
 */
@@ -42,7 +121,13 @@ private:
  std::size_t m_index{0};
  bool m_full{false};

+  std::size_t get_front_index() const 
+  {
+    return m_full ? m_index : 0;
+  }
+
 public:
+
  /**
   * Create a new ring buffer with the requested capacity.
   */
@@ -51,17 +136,48 @@ public:
  {}

  /**
-   * Iterators provide all values in the ring buffer in unspecified order.
+   * Iterators provide all values in the ring buffer in FIFO order.
   * @{
   */
-  // clang-format off
-  [[nodiscard]] auto begin()        { return m_buffer.begin(); }
-  [[nodiscard]] auto begin() const  { return m_buffer.begin(); }
-  [[nodiscard]] auto cbegin() const { return m_buffer.cbegin(); }
-  [[nodiscard]] auto end()        { return m_buffer.begin()  + static_cast<diff_t>(this->size()); }
-  [[nodiscard]] auto end() const  { return m_buffer.begin()  + static_cast<diff_t>(this->size()); }
-  [[nodiscard]] auto cend() const { return m_buffer.cbegin() + static_cast<diff_t>(this->size()); }
-  // clang-format on
+  [[nodiscard]] ring_buffer_iterator<T> begin()
+  {
+    return {static_cast<std::ptrdiff_t>(get_front_index()),
+            static_cast<std::ptrdiff_t>(capacity()),
+            m_buffer.data()};
+  }
+
+  [[nodiscard]] ring_buffer_iterator<T> end()
+  {
+    return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
+            static_cast<std::ptrdiff_t>(capacity()),
+            m_buffer.data()};
+  }
+  [[nodiscard]] ring_buffer_iterator<const T> begin() const
+  {
+    return {static_cast<std::ptrdiff_t>(get_front_index()),
+            static_cast<std::ptrdiff_t>(capacity()),
+            m_buffer.data()};
+  }
+
+  [[nodiscard]] ring_buffer_iterator<const T> end() const
+  {
+    return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
+            static_cast<std::ptrdiff_t>(capacity()),
+            m_buffer.data()};
+  }
+  [[nodiscard]] ring_buffer_iterator<const T> cbegin() const
+  {
+    return {static_cast<std::ptrdiff_t>(get_front_index()),
+            static_cast<std::ptrdiff_t>(capacity()),
+            m_buffer.data()};
+  }
+
+  [[nodiscard]] ring_buffer_iterator<const T> cend() const
+  {
+    return {static_cast<std::ptrdiff_t>(get_front_index() + size()),
+            static_cast<std::ptrdiff_t>(capacity()),
+            m_buffer.data()};
+  }
  /** @} */

  /**
--- a/nvbench/detail/statistics.cuh
+++ b/nvbench/detail/statistics.cuh
@@ -18,14 +18,15 @@

 #pragma once

-#include <nvbench/types.cuh>
-
 #include <nvbench/detail/transform_reduce.cuh>
+#include <nvbench/types.cuh>

 #include <cmath>
 #include <functional>
 #include <iterator>
 #include <limits>
+#include <numeric>
+
 #include <type_traits>

 namespace nvbench::detail::statistics
@@ -41,7 +42,8 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
 {
  static_assert(std::is_floating_point_v<ValueType>);

-  const auto num = last - first;
+  const auto num = std::distance(first, last);
+
  if (num < 5) // don't bother with low sample sizes.
  {
    return std::numeric_limits<ValueType>::infinity();
@@ -56,8 +58,135 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
                                                            val *= val;
                                                            return val;
                                                          }) /
-                        static_cast<ValueType>((num - 1));
+                        static_cast<ValueType>((num - 1)); // Bessel’s correction
  return std::sqrt(variance);
 }

+/**
+ * Computes and returns the mean.
+ *
+ * If the input has fewer than 1 sample, infinity is returned.
+ */
+template <class It>
+nvbench::float64_t compute_mean(It first, It last)
+{
+  const auto num = std::distance(first, last);
+
+  if (num < 1)
+  {
+    return std::numeric_limits<nvbench::float64_t>::infinity();
+  }
+
+  return std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(num);
+}
+
+/**
+ * Computes linear regression and returns the slope and intercept
+ *
+ * This version takes precomputed mean of [first, last).
+ * If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
+ */
+template <class It>
+std::pair<nvbench::float64_t, nvbench::float64_t>
+compute_linear_regression(It first, It last, nvbench::float64_t mean_y)
+{
+  const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
+
+  if (n < 2)
+  {
+    return std::make_pair(std::numeric_limits<nvbench::float64_t>::infinity(),
+                          std::numeric_limits<nvbench::float64_t>::infinity());
+  }
+
+  // Assuming x starts from 0
+  const nvbench::float64_t mean_x = (static_cast<nvbench::float64_t>(n) - 1.0) / 2.0;
+
+  // Calculate the numerator and denominator for the slope
+  nvbench::float64_t numerator   = 0.0;
+  nvbench::float64_t denominator = 0.0;
+
+  for (std::size_t i = 0; i < n; ++i, ++first)
+  {
+    const nvbench::float64_t x_diff = static_cast<nvbench::float64_t>(i) - mean_x;
+    numerator += x_diff * (*first - mean_y);
+    denominator += x_diff * x_diff;
+  }
+
+  // Calculate the slope and intercept
+  const nvbench::float64_t slope     = numerator / denominator;
+  const nvbench::float64_t intercept = mean_y - slope * mean_x;
+
+  return std::make_pair(slope, intercept);
+}
+
+/**
+ * Computes linear regression and returns the slope and intercept
+ *
+ * If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
+ */
+template <class It>
+std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It first, It last)
+{
+  return compute_linear_regression(first, last, compute_mean(first, last));
+}
+
+/**
+ * Computes and returns the R^2 (coefficient of determination)
+ *
+ * This version takes precomputed mean of [first, last).
+ */
+template <class It>
+nvbench::float64_t compute_r2(It first,
+                              It last,
+                              nvbench::float64_t mean_y,
+                              nvbench::float64_t slope,
+                              nvbench::float64_t intercept)
+{
+  const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
+
+  nvbench::float64_t ss_tot = 0.0;
+  nvbench::float64_t ss_res = 0.0;
+
+  for (std::size_t i = 0; i < n; ++i, ++first)
+  {
+    const nvbench::float64_t y = *first;
+    const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept;
+
+    ss_tot += (y - mean_y) * (y - mean_y);
+    ss_res += (y - y_pred) * (y - y_pred);
+  }
+
+  if (ss_tot == 0.0)
+  {
+    return 1.0;
+  }
+
+  return 1.0 - ss_res / ss_tot;
+}
+
+/**
+ * Computes and returns the R^2 (coefficient of determination)
+ */
+template <class It>
+nvbench::float64_t
+compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept)
+{
+  return compute_r2(first, last, compute_mean(first, last), slope, intercept);
+}
+
+inline nvbench::float64_t rad2deg(nvbench::float64_t rad)
+{
+  return rad * 180.0 / M_PI;
+}
+
+inline nvbench::float64_t slope2rad(nvbench::float64_t slope)
+{
+  return std::atan2(slope, 1.0);
+}
+
+inline nvbench::float64_t slope2deg(nvbench::float64_t slope)
+{
+  return rad2deg(slope2rad(slope));
+}
+
 } // namespace nvbench::detail::statistics
--- a/nvbench/detail/stdrel_criterion.cuh
+++ b/nvbench/detail/stdrel_criterion.cuh
@@ -0,0 +1,47 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/types.cuh>
+#include <nvbench/stopping_criterion.cuh>
+#include <nvbench/detail/ring_buffer.cuh>
+
+#include <vector>
+
+namespace nvbench::detail
+{
+
+class stdrel_criterion final : public stopping_criterion_base
+{
+  // state
+  nvbench::int64_t m_total_samples{};
+  nvbench::float64_t m_total_cuda_time{};
+  std::vector<nvbench::float64_t> m_cuda_times{};
+  nvbench::detail::ring_buffer<nvbench::float64_t> m_noise_tracker{512};
+
+public:
+  stdrel_criterion();
+
+protected:
+  virtual void do_initialize() override;
+  virtual void do_add_measurement(nvbench::float64_t measurement) override;
+  virtual bool do_is_finished() override;
+};
+
+} // namespace nvbench::detail
--- a/nvbench/detail/stdrel_criterion.cxx
+++ b/nvbench/detail/stdrel_criterion.cxx
@@ -0,0 +1,98 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/detail/stdrel_criterion.cuh>
+
+namespace nvbench::detail
+{
+
+stdrel_criterion::stdrel_criterion()
+    : stopping_criterion_base{"stdrel",
+                              {{"max-noise", nvbench::detail::compat_max_noise()},
+                               {"min-time", nvbench::detail::compat_min_time()}}}
+{}
+
+void stdrel_criterion::do_initialize()
+{
+  m_total_samples = 0;
+  m_total_cuda_time = 0.0;
+  m_cuda_times.clear();
+  m_noise_tracker.clear();
+}
+
+void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
+{
+  m_total_samples++;
+  m_total_cuda_time += measurement;
+  m_cuda_times.push_back(measurement);
+
+  // Compute convergence statistics using CUDA timings:
+  const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
+  const auto cuda_stdev     = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
+                                                                          m_cuda_times.cend(),
+                                                                          mean_cuda_time);
+  const auto cuda_rel_stdev       = cuda_stdev / mean_cuda_time;
+  if (std::isfinite(cuda_rel_stdev))
+  {
+    m_noise_tracker.push_back(cuda_rel_stdev);
+  }
+}
+
+bool stdrel_criterion::do_is_finished()
+{
+  if (m_total_cuda_time <= m_params.get_float64("min-time"))
+  {
+    return false;
+  }
+
+  // Noise has dropped below threshold
+  if (m_noise_tracker.back() < m_params.get_float64("max-noise"))
+  {
+    return true;
+  }
+
+  // Check if the noise (cuda rel stdev) has converged by inspecting a
+  // trailing window of recorded noise measurements.
+  // This helps identify benchmarks that are inherently noisy and would
+  // never converge to the target stdev threshold. This check ensures that the
+  // benchmark will end if the stdev stabilizes above the target threshold.
+  // Gather some iterations before checking noise, and limit how often we
+  // check this.
+  if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0))
+  {
+    // Use the current noise as the stdev reference.
+    const auto current_noise = m_noise_tracker.back();
+    const auto noise_stdev =
+      nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(),
+                                                      m_noise_tracker.cend(),
+                                                      current_noise);
+    const auto noise_rel_stdev = noise_stdev / current_noise;
+
+    // If the rel stdev of the last N cuda noise measurements is less than
+    // 5%, consider the result stable.
+    const auto noise_threshold = 0.05;
+    if (noise_rel_stdev < noise_threshold)
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+} // namespace nvbench::detail
--- a/nvbench/nvbench.cuh
+++ b/nvbench/nvbench.cuh
@@ -24,6 +24,7 @@
 #include <nvbench/callable.cuh>
 #include <nvbench/config.cuh>
 #include <nvbench/cpu_timer.cuh>
+#include <nvbench/criterion_manager.cuh>
 #include <nvbench/create.cuh>
 #include <nvbench/cuda_call.cuh>
 #include <nvbench/cuda_stream.cuh>
--- a/nvbench/option_parser.cu
+++ b/nvbench/option_parser.cu
@@ -21,6 +21,7 @@
 #include <nvbench/benchmark_base.cuh>
 #include <nvbench/benchmark_manager.cuh>
 #include <nvbench/csv_printer.cuh>
+#include <nvbench/criterion_manager.cuh>
 #include <nvbench/git_revision.cuh>
 #include <nvbench/json_printer.cuh>
 #include <nvbench/markdown_printer.cuh>
@@ -376,6 +377,9 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
    }
  };

+  const nvbench::criterion_manager::params_description criterion_params =
+    nvbench::criterion_manager::get().get_params_description();
+
  while (first < last)
  {
    const auto &arg = *first;
@@ -433,6 +437,12 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
      this->enable_run_once();
      first += 1;
    }
+    else if (arg == "--stopping-criterion")
+    {
+      check_params(1);
+      this->set_stopping_criterion(first[1]);
+      first += 2;
+    }
    else if (arg == "--disable-blocking-kernel")
    {
      this->disable_blocking_kernel();
@@ -504,16 +514,34 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
      this->update_int64_prop(first[0], first[1]);
      first += 2;
    }
-    else if (arg == "--min-time" || arg == "--max-noise" || arg == "--skip-time" ||
-             arg == "--timeout")
+    else if (arg == "--skip-time" || arg == "--timeout")
    {
      check_params(1);
      this->update_float64_prop(first[0], first[1]);
      first += 2;
    }
    else
-    {
-      NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
+    { // Try criterion params
+      if (arg.size() < 3 || arg[0] != '-' || arg[1] != '-')
+      {
+        NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
+      }
+
+      std::string_view name(arg.c_str() + 2, arg.size() - 2);
+      auto it = std::find_if(criterion_params.begin(),
+                             criterion_params.end(),
+                             [&name](const auto &param) { return param.first == name; });
+
+      if (it != criterion_params.end())
+      {
+        check_params(1);
+        this->update_criterion_prop(first[0], first[1], it->second);
+        first += 2;
+      }
+      else
+      {
+        NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg);
+      }
    }
  }
 }
@@ -698,6 +726,20 @@ void option_parser::enable_run_once()
  bench.set_run_once(true);
 }

+void option_parser::set_stopping_criterion(const std::string &criterion)
+{
+  // If no active benchmark, save args as global.
+  if (m_benchmarks.empty())
+  {
+    m_global_benchmark_args.push_back("--stopping-criterion");
+    m_global_benchmark_args.push_back(criterion);
+    return;
+  }
+
+  benchmark_base &bench = *m_benchmarks.back();
+  bench.set_stopping_criterion(criterion);
+}
+
 void option_parser::disable_blocking_kernel()
 {
  // If no active benchmark, save args as global.
@@ -933,6 +975,58 @@ catch (std::exception &e)
                e.what());
 }

+void option_parser::update_criterion_prop(
+  const std::string &prop_arg,
+  const std::string &prop_val,
+  const nvbench::named_values::type type)
+try 
+{
+  // If no active benchmark, save args as global.
+  if (m_benchmarks.empty())
+  {
+    m_global_benchmark_args.push_back(prop_arg);
+    m_global_benchmark_args.push_back(prop_val);
+    return;
+  }
+
+  benchmark_base &bench = *m_benchmarks.back();
+  nvbench::criterion_params& criterion_params = bench.get_criterion_params();
+  std::string name(prop_arg.begin() + 2, prop_arg.end());
+  if (type == nvbench::named_values::type::float64) 
+  {
+    nvbench::float64_t value{};
+    ::parse(prop_val, value);
+
+    if (prop_arg == "--max-noise")
+    { // Specified as percentage, stored as ratio:
+      value /= 100.0;
+    }
+    criterion_params.set_float64(name, value);
+  }
+  else if (type == nvbench::named_values::type::int64) 
+  {
+    nvbench::int64_t value{};
+    ::parse(prop_val, value);
+    criterion_params.set_int64(name, value);
+  }
+  else if (type == nvbench::named_values::type::string) 
+  {
+    criterion_params.set_string(name, prop_val);
+  }
+  else 
+  {
+    NVBENCH_THROW(std::runtime_error, "Unrecognized property: `{}`", prop_arg);
+  }
+}
+catch (std::exception& e)
+{
+  NVBENCH_THROW(std::runtime_error,
+                "Error handling option `{} {}`:\n{}",
+                prop_arg,
+                prop_val,
+                e.what());
+}
+
 void option_parser::update_float64_prop(const std::string &prop_arg, const std::string &prop_val)
 try
 {
@@ -948,15 +1042,7 @@ try

  nvbench::float64_t value{};
  ::parse(prop_val, value);
-  if (prop_arg == "--min-time")
-  {
-    bench.set_min_time(value);
-  }
-  else if (prop_arg == "--max-noise")
-  { // Specified as percentage, stored as ratio:
-    bench.set_max_noise(value / 100.);
-  }
-  else if (prop_arg == "--skip-time")
+  if (prop_arg == "--skip-time")
  {
    bench.set_skip_time(value);
  }
--- a/nvbench/option_parser.cuh
+++ b/nvbench/option_parser.cuh
@@ -20,6 +20,7 @@

 #include <nvbench/device_info.cuh>
 #include <nvbench/printer_multiplex.cuh>
+#include <nvbench/stopping_criterion.cuh>

 #include <iosfwd>
 #include <memory>
@@ -86,6 +87,7 @@ private:
  void set_persistence_mode(const std::string &state);
  void lock_gpu_clocks(const std::string &rate);

+  void set_stopping_criterion(const std::string &criterion);
  void enable_run_once();
  void disable_blocking_kernel();

@@ -111,6 +113,10 @@ private:
  void update_int64_prop(const std::string &prop_arg, const std::string &prop_val);
  void update_float64_prop(const std::string &prop_arg, const std::string &prop_val);

+  void update_criterion_prop(const std::string &prop_arg,
+                             const std::string &prop_val,
+                             const nvbench::named_values::type type);
+
  void update_used_device_state() const;

  // Command line args
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -24,6 +24,7 @@
 #include <nvbench/named_values.cuh>
 #include <nvbench/summary.cuh>
 #include <nvbench/types.cuh>
+#include <nvbench/stopping_criterion.cuh>

 #include <functional>
 #include <optional>
@@ -122,6 +123,17 @@ struct state
  void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; }
  /// @}

+  [[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
+  {
+    return m_criterion_params;
+  }
+
+  /// Control the stopping criterion for the measurement loop.
+  /// @{
+  [[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; }
+  void set_stopping_criterion(std::string criterion) { m_stopping_criterion = std::move(criterion); }
+  /// @}
+
  /// If true, the benchmark is only run once, skipping all warmup runs and only
  /// executing a single non-batched measurement. This is intended for use with
  /// external profiling tools. @{
@@ -135,16 +147,30 @@ struct state
  void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; }
  /// @}

-  /// Accumulate at least this many seconds of timing data per measurement. @{
-  [[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; }
-  void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; }
+  /// Accumulate at least this many seconds of timing data per measurement. 
+  /// Only applies to `stdrel` stopping criterion. @{
+  [[nodiscard]] nvbench::float64_t get_min_time() const
+  {
+    return m_criterion_params.get_float64("min-time");
+  }
+  void set_min_time(nvbench::float64_t min_time)
+  {
+    m_criterion_params.set_float64("min-time", min_time);
+  }
  /// @}

  /// Specify the maximum amount of noise if a measurement supports noise.
  /// Noise is the relative standard deviation:
-  /// `noise = stdev / mean_time`. @{
-  [[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; }
-  void set_max_noise(nvbench::float64_t max_noise) { m_max_noise = max_noise; }
+  /// `noise = stdev / mean_time`.
+  /// Only applies to `stdrel` stopping criterion. @{
+  [[nodiscard]] nvbench::float64_t get_max_noise() const
+  {
+    return m_criterion_params.get_float64("max-noise");
+  }
+  void set_max_noise(nvbench::float64_t max_noise)
+  {
+    m_criterion_params.set_float64("max-noise", max_noise);
+  }
  /// @}

  /// If a warmup run finishes in less than `skip_time`, the measurement will
@@ -270,9 +296,11 @@ private:
  bool m_run_once{false};
  bool m_disable_blocking_kernel{false};

+
+  nvbench::criterion_params m_criterion_params;
+  std::string m_stopping_criterion;
+
  nvbench::int64_t m_min_samples;
-  nvbench::float64_t m_min_time;
-  nvbench::float64_t m_max_noise;

  nvbench::float64_t m_skip_time;
  nvbench::float64_t m_timeout;
--- a/nvbench/state.cxx
+++ b/nvbench/state.cxx
@@ -36,9 +36,9 @@ state::state(const benchmark_base &bench)
    : m_benchmark{bench}
    , m_run_once{bench.get_run_once()}
    , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
+    , m_criterion_params{bench.get_criterion_params()}
+    , m_stopping_criterion(bench.get_stopping_criterion())
    , m_min_samples{bench.get_min_samples()}
-    , m_min_time{bench.get_min_time()}
-    , m_max_noise{bench.get_max_noise()}
    , m_skip_time{bench.get_skip_time()}
    , m_timeout{bench.get_timeout()}
 {}
@@ -53,9 +53,9 @@ state::state(const benchmark_base &bench,
    , m_type_config_index{type_config_index}
    , m_run_once{bench.get_run_once()}
    , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
+    , m_criterion_params{bench.get_criterion_params()}
+    , m_stopping_criterion(bench.get_stopping_criterion())
    , m_min_samples{bench.get_min_samples()}
-    , m_min_time{bench.get_min_time()}
-    , m_max_noise{bench.get_max_noise()}
    , m_skip_time{bench.get_skip_time()}
    , m_timeout{bench.get_timeout()}
 {}
--- a/nvbench/stopping_criterion.cuh
+++ b/nvbench/stopping_criterion.cuh
@@ -0,0 +1,136 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <nvbench/named_values.cuh>
+#include <nvbench/types.cuh>
+
+#include <string>
+
+#include <initializer_list>
+#include <unordered_map>
+
+namespace nvbench
+{
+
+namespace detail 
+{
+
+constexpr nvbench::float64_t compat_min_time() { return 0.5; }    // 0.5 seconds
+constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation
+
+} // namespace detail
+
+/**
+ * Stores all the parameters for stopping criterion in use
+ */
+class criterion_params
+{
+  nvbench::named_values m_named_values;
+public:
+  criterion_params();
+  criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>);
+
+  /**
+   * Set parameter values from another criterion_params object if they exist
+   *
+   * Parameters in `other` that do not correspond to parameters in `this` are ignored.
+   */
+  void set_from(const criterion_params &other);
+
+  void set_int64(std::string name, nvbench::int64_t value);
+  void set_float64(std::string name, nvbench::float64_t value);
+  void set_string(std::string name, std::string value);
+
+  [[nodiscard]] std::vector<std::string> get_names() const;
+  [[nodiscard]] nvbench::named_values::type get_type(const std::string &name) const;
+
+  [[nodiscard]] bool has_value(const std::string &name) const;
+  [[nodiscard]] nvbench::int64_t get_int64(const std::string &name) const;
+  [[nodiscard]] nvbench::float64_t get_float64(const std::string &name) const;
+  [[nodiscard]] std::string get_string(const std::string &name) const;
+};
+
+/**
+ * Stopping criterion interface
+ */
+class stopping_criterion_base
+{
+protected:
+  std::string m_name;
+  criterion_params m_params;
+
+public:
+  /**
+   * @param name Unique name of the criterion
+   * @param params Default values for all parameters of the criterion
+   */
+  explicit stopping_criterion_base(std::string name, criterion_params params)
+      : m_name{std::move(name)}
+      , m_params{std::move(params)}
+  {}
+
+  [[nodiscard]] const std::string &get_name() const { return m_name; }
+  [[nodiscard]] const criterion_params &get_params() const { return m_params; }
+
+  /**
+   * Initialize the criterion with the given parameters
+   *
+   * This method is called once per benchmark run, before any measurements are provided.
+   */
+  void initialize(const criterion_params &params) 
+  {
+    m_params.set_from(params);
+    this->do_initialize();
+  }
+
+  /**
+   * Add the latest measurement to the criterion
+   */
+  void add_measurement(nvbench::float64_t measurement)
+  {
+    this->do_add_measurement(measurement);
+  }
+
+  /**
+   * Check if the criterion has been met for all measurements processed by `add_measurement`
+   */
+  bool is_finished()
+  {
+    return this->do_is_finished();
+  }
+
+protected:
+  /**
+   * Initialize the criterion after updaring the parameters
+   */
+  virtual void do_initialize() = 0;
+
+  /**
+   * Add the latest measurement to the criterion
+   */
+  virtual void do_add_measurement(nvbench::float64_t measurement) = 0;
+
+  /**
+   * Check if the criterion has been met for all measurements processed by `add_measurement`
+   */
+  virtual bool do_is_finished() = 0;
+};
+
+} // namespace nvbench
--- a/nvbench/stopping_criterion.cxx
+++ b/nvbench/stopping_criterion.cxx
@@ -0,0 +1,124 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/stopping_criterion.cuh>
+
+#include <nvbench/detail/throw.cuh>
+
+
+namespace nvbench
+{
+
+// Default constructor for compatibility with old code
+criterion_params::criterion_params()
+    : criterion_params{{"max-noise", nvbench::detail::compat_max_noise()},
+                       {"min-time", nvbench::detail::compat_min_time()}}
+{}
+
+criterion_params::criterion_params(
+  std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>> list)
+{
+  for (const auto &[name, value] : list)
+  {
+    m_named_values.set_value(name, value);
+  }
+}
+
+void criterion_params::set_from(const criterion_params &other)
+{
+  for (const std::string &name : this->get_names())
+  {
+    if (other.has_value(name))
+    {
+      if (this->get_type(name) != other.get_type(name))
+      {
+        NVBENCH_THROW(std::runtime_error,
+                      "Mismatched types for named value \"{}\". "
+                      "Expected {}, got {}.",
+                      name,
+                      static_cast<int>(this->get_type(name)),
+                      static_cast<int>(other.get_type(name)));
+      }
+      m_named_values.remove_value(name);
+      m_named_values.set_value(name, other.m_named_values.get_value(name));
+    }
+  }
+}
+
+void criterion_params::set_int64(std::string name, nvbench::int64_t value)
+{
+  if (m_named_values.has_value(name)) 
+  {
+    m_named_values.remove_value(name);
+  }
+
+  m_named_values.set_int64(name, value);
+}
+
+void criterion_params::set_float64(std::string name, nvbench::float64_t value)
+{
+  if (m_named_values.has_value(name)) 
+  {
+    m_named_values.remove_value(name);
+  }
+
+  m_named_values.set_float64(name, value);
+}
+
+void criterion_params::set_string(std::string name, std::string value)
+{
+  if (m_named_values.has_value(name)) 
+  {
+    m_named_values.remove_value(name);
+  }
+
+  m_named_values.set_string(name, std::move(value));
+}
+
+bool criterion_params::has_value(const std::string &name) const
+{
+  return m_named_values.has_value(name);
+}
+
+nvbench::int64_t criterion_params::get_int64(const std::string &name) const
+{
+  return m_named_values.get_int64(name);
+}
+
+nvbench::float64_t criterion_params::get_float64(const std::string &name) const
+{
+  return m_named_values.get_float64(name);
+}
+
+std::string criterion_params::get_string(const std::string &name) const
+{
+  return m_named_values.get_string(name);
+}
+
+std::vector<std::string> criterion_params::get_names() const
+{
+  return m_named_values.get_names();
+}
+
+nvbench::named_values::type criterion_params::get_type(const std::string &name) const
+{
+  return m_named_values.get_type(name);
+}
+
+
+} // namespace nvbench::detail
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -4,7 +4,10 @@ set(test_srcs
  create.cu
  cuda_timer.cu
  cpu_timer.cu
+  criterion_manager.cu
+  criterion_params.cu
  enum_type_list.cu
+  entropy_criterion.cu
  float64_axis.cu
  int64_axis.cu
  named_values.cu
@@ -13,7 +16,9 @@ set(test_srcs
  ring_buffer.cu
  runner.cu
  state.cu
+  statistics.cu
  state_generator.cu
+  stdrel_criterion.cu
  string_axis.cu
  type_axis.cu
  type_list.cu
--- a/testing/criterion_manager.cu
+++ b/testing/criterion_manager.cu
@@ -0,0 +1,76 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/criterion_manager.cuh>
+#include <nvbench/types.cuh>
+
+#include "test_asserts.cuh"
+
+void test_standard_criteria_exist()
+{
+  ASSERT(nvbench::criterion_manager::get().get_criterion("stdrel").get_name() == "stdrel");
+  ASSERT(nvbench::criterion_manager::get().get_criterion("entropy").get_name() == "entropy");
+}
+
+class custom_criterion : public nvbench::stopping_criterion_base
+{
+public:
+  custom_criterion()
+      : nvbench::stopping_criterion_base("custom", nvbench::criterion_params{})
+  {}
+
+protected:
+  virtual void do_initialize() override {}
+  virtual void do_add_measurement(nvbench::float64_t /* measurement */) override {}
+  virtual bool do_is_finished() override { return true; }
+};
+
+void test_no_duplicates_are_allowed()
+{
+  nvbench::criterion_manager& manager = nvbench::criterion_manager::get();
+  bool exception_triggered = false;
+
+  try {
+    nvbench::stopping_criterion_base& custom = manager.get_criterion("custom");
+  } catch(...) {
+    exception_triggered = true;
+  }
+  ASSERT(exception_triggered);
+
+  std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>();
+  custom_criterion* custom_raw = custom_ptr.get();
+  ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw);
+
+  nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom");
+  ASSERT(custom_raw == &custom);
+
+  exception_triggered = false;
+  try {
+    manager.add(std::make_unique<custom_criterion>());
+  } catch(...) {
+    exception_triggered = true;
+  }
+  ASSERT(exception_triggered);
+}
+
+int main()
+{
+  test_standard_criteria_exist();
+  test_no_duplicates_are_allowed();
+}
+
--- a/testing/criterion_params.cu
+++ b/testing/criterion_params.cu
@@ -0,0 +1,63 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/criterion_manager.cuh>
+#include <nvbench/types.cuh>
+
+#include "test_asserts.cuh"
+
+void test_compat_parameters()
+{
+  nvbench::criterion_params params;
+
+  ASSERT(params.has_value("max-noise"));
+  ASSERT(params.has_value("min-time"));
+
+  ASSERT(params.get_float64("max-noise") == nvbench::detail::compat_max_noise());
+  ASSERT(params.get_float64("min-time") == nvbench::detail::compat_min_time());
+}
+
+void test_compat_overwrite()
+{
+  nvbench::criterion_params params;
+  params.set_float64("max-noise", 40000.0);
+  params.set_float64("min-time", 42000.0);
+
+  ASSERT(params.get_float64("max-noise") == 40000.0);
+  ASSERT(params.get_float64("min-time") == 42000.0);
+}
+
+void test_overwrite()
+{
+  nvbench::criterion_params params;
+  ASSERT(!params.has_value("custom"));
+
+  params.set_float64("custom", 42.0);
+  ASSERT(params.get_float64("custom") == 42.0);
+
+  params.set_float64("custom", 4.2);
+  ASSERT(params.get_float64("custom") == 4.2);
+}
+
+int main()
+{
+  test_compat_parameters();
+  test_compat_overwrite();
+  test_overwrite();
+}
+
--- a/testing/entropy_criterion.cu
+++ b/testing/entropy_criterion.cu
@@ -0,0 +1,91 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/detail/entropy_criterion.cuh>
+#include <nvbench/stopping_criterion.cuh>
+#include <nvbench/types.cuh>
+
+#include "test_asserts.cuh"
+
+#include <vector>
+#include <random>
+#include <numeric>
+
+void test_const()
+{
+  nvbench::criterion_params params;
+  nvbench::detail::entropy_criterion criterion;
+
+  criterion.initialize(params);
+  for (int i = 0; i < 6; i++) 
+  { // nvbench wants at least 5 to compute the standard deviation
+    criterion.add_measurement(42.0);
+  }
+  ASSERT(criterion.is_finished());
+}
+
+void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion)
+{
+  /*
+   * This pattern is designed to simulate the entropy:
+   *
+   *   0.0, 1.0, 1.5, 2.0, 2.3, 2.5 <---- no unexpected measurement after this point
+   *   2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+
+   *   1.8, 1.7, 1.6, 1.6, 1.5, 1.4   |
+   *   1.4, 1.3, 1.3, 1.3, 1.2, 1.2   |
+   *   1.1, 1.1, 1.1, 1.0, 1.0, 1.0   +-- entropy only decreases after 5-th sample, 
+   *   1.0, 0.9, 0.9, 0.9, 0.9, 0.9   |   so the slope should be negative
+   *   0.8, 0.8, 0.8, 0.8, 0.8, 0.8   |
+   *   0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+
+   */
+  for (nvbench::float64_t x = 0.0; x < 50.0; x += 1.0)
+  {
+    criterion.add_measurement(x > 5.0 ? 5.0 : x);
+  }
+}
+
+void test_entropy_arch()
+{
+  nvbench::detail::entropy_criterion criterion;
+
+  // The R2 should be around 0.5
+  // The angle should be around -1.83
+  nvbench::criterion_params params;
+  params.set_float64("min-r2", 0.3);
+  params.set_float64("max-angle", -1.0);
+  criterion.initialize(params);
+  produce_entropy_arch(criterion);
+  ASSERT(criterion.is_finished());
+
+  params.set_float64("min-r2", 0.7);
+  criterion.initialize(params);
+  produce_entropy_arch(criterion);
+  ASSERT(!criterion.is_finished());
+
+  params.set_float64("min-r2", 0.3);
+  params.set_float64("max-angle", -2.0);
+  criterion.initialize(params);
+  produce_entropy_arch(criterion);
+  ASSERT(!criterion.is_finished());
+}
+
+int main()
+{
+  test_const();
+  test_entropy_arch();
+}
--- a/testing/option_parser.cu
+++ b/testing/option_parser.cu
@@ -1229,6 +1229,27 @@ void test_timeout()
  ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
 }

+void test_stopping_criterion()
+{
+  nvbench::option_parser parser;
+  parser.parse(
+    {"--benchmark", "DummyBench", 
+     "--stopping-criterion", "entropy",
+     "--max-angle", "0.42",
+     "--min-r2", "0.6"});
+  const auto& states = parser_to_states(parser);
+
+  ASSERT(states.size() == 1);
+  ASSERT(states[0].get_stopping_criterion() == "entropy");
+
+  const nvbench::criterion_params &criterion_params = states[0].get_criterion_params();
+  ASSERT(criterion_params.has_value("max-angle"));
+  ASSERT(criterion_params.has_value("min-r2"));
+
+  ASSERT(criterion_params.get_float64("max-angle") == 0.42);
+  ASSERT(criterion_params.get_float64("min-r2") == 0.6);
+}
+
 int main()
 try
 {
@@ -1265,6 +1286,8 @@ try
  test_skip_time();
  test_timeout();

+  test_stopping_criterion();
+
  return 0;
 }
 catch (std::exception &err)
--- a/testing/ring_buffer.cu
+++ b/testing/ring_buffer.cu
@@ -27,7 +27,7 @@ template <typename T>
 bool equal(const nvbench::detail::ring_buffer<T> &buffer,
           const std::vector<T> &reference)
 {
-  return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin());
+  return std::equal(buffer.begin(), buffer.end(), reference.begin());
 }

 int main()
@@ -62,12 +62,12 @@ try
  ASSERT(avg.size() == 3);
  ASSERT(avg.capacity() == 3);
  ASSERT_MSG(avg.back() == 5, " (got {})", avg.back());
-  ASSERT(equal(avg, {5, 2, -15}));
+  ASSERT(equal(avg, {2, -15, 5}));

  avg.push_back(0);
  ASSERT(avg.size() == 3);
  ASSERT(avg.capacity() == 3);
-  ASSERT(equal(avg, {5, 0, -15}));
+  ASSERT(equal(avg, {-15, 5, 0}));
  ASSERT_MSG(avg.back() == 0, " (got {})", avg.back());

  avg.push_back(128);
--- a/testing/statistics.cu
+++ b/testing/statistics.cu
@@ -0,0 +1,128 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/detail/statistics.cuh>
+#include <nvbench/types.cuh>
+
+#include "test_asserts.cuh"
+
+#include <vector>
+
+namespace statistics = nvbench::detail::statistics;
+
+void test_mean()
+{
+  {
+    std::vector<nvbench::float64_t> data{1.0, 2.0, 3.0, 4.0, 5.0};
+    const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data));
+    const nvbench::float64_t expected = 3.0;
+    ASSERT(std::abs(actual - expected) < 0.001);
+  }
+
+  {
+    std::vector<nvbench::float64_t> data;
+    const bool finite = std::isfinite(statistics::compute_mean(std::begin(data), std::end(data)));
+    ASSERT(!finite);
+  }
+}
+
+void test_std()
+{
+  std::vector<nvbench::float64_t> data{1.0, 2.0, 3.0, 4.0, 5.0};
+  const nvbench::float64_t mean = 3.0;
+  const nvbench::float64_t actual = statistics::standard_deviation(std::begin(data), std::end(data), mean);
+  const nvbench::float64_t expected = 1.581;
+  ASSERT(std::abs(actual - expected) < 0.001);
+}
+
+void test_lin_regression()
+{
+  {
+    std::vector<nvbench::float64_t> ys{1.0, 2.0, 3.0, 4.0, 5.0};
+    auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
+    ASSERT(slope == 1.0);
+    ASSERT(intercept == 1.0);
+  }
+  {
+    std::vector<nvbench::float64_t> ys{42.0, 42.0, 42.0};
+    auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
+    ASSERT(slope == 0.0);
+    ASSERT(intercept == 42.0);
+  }
+  {
+    std::vector<nvbench::float64_t> ys{8.0, 4.0, 0.0};
+    auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
+    ASSERT(slope == -4.0);
+    ASSERT(intercept == 8.0);
+  }
+}
+
+void test_r2()
+{
+  {
+    std::vector<nvbench::float64_t> ys{1.0, 2.0, 3.0, 4.0, 5.0};
+    auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
+    const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept);
+    const nvbench::float64_t expected = 1.0;
+    ASSERT(std::abs(actual - expected) < 0.001);
+  }
+  {
+    std::vector<nvbench::float64_t> signal{1.0, 2.0, 3.0, 4.0, 5.0};
+    std::vector<nvbench::float64_t> noise{-1.0, 1.0, -1.0, 1.0, -1.0};
+    std::vector<nvbench::float64_t> ys(signal.size());
+
+    std::transform(std::begin(signal),
+                   std::end(signal),
+                   std::begin(noise),
+                   std::begin(ys),
+                   std::plus<nvbench::float64_t>());
+
+    auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys));
+    const nvbench::float64_t expected = 0.675;
+    const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept);
+    ASSERT(std::abs(actual - expected) < 0.001);
+  }
+}
+
+void test_slope_conversion()
+{
+  {
+    const nvbench::float64_t actual = statistics::slope2deg(0.0);
+    const nvbench::float64_t expected = 0.0;
+    ASSERT(std::abs(actual - expected) < 0.001);
+  }
+  {
+    const nvbench::float64_t actual = statistics::slope2deg(1.0);
+    const nvbench::float64_t expected = 45.0;
+    ASSERT(std::abs(actual - expected) < 0.001);
+  }
+  {
+    const nvbench::float64_t actual = statistics::slope2deg(5.0);
+    const nvbench::float64_t expected = 78.69;
+    ASSERT(std::abs(actual - expected) < 0.001);
+  }
+}
+
+int main()
+{
+  test_mean();
+  test_std();
+  test_lin_regression();
+  test_r2();
+  test_slope_conversion();
+}
--- a/testing/stdrel_criterion.cu
+++ b/testing/stdrel_criterion.cu
@@ -0,0 +1,84 @@
+/*
+ *  Copyright 2023 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 with the LLVM exception
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.
+ *
+ *  You may obtain a copy of the License at
+ *
+ *      http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <nvbench/detail/stdrel_criterion.cuh>
+#include <nvbench/stopping_criterion.cuh>
+#include <nvbench/types.cuh>
+
+#include "test_asserts.cuh"
+
+#include <vector>
+#include <random>
+#include <numeric>
+
+void test_const()
+{
+  nvbench::criterion_params params;
+  nvbench::detail::stdrel_criterion criterion;
+
+  criterion.initialize(params);
+  for (int i = 0; i < 5; i++) 
+  { // nvbench wants at least 5 to compute the standard deviation
+    criterion.add_measurement(42.0);
+  }
+  ASSERT(criterion.is_finished());
+}
+
+std::vector<double> generate(double mean, double rel_std_dev, int size)
+{
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::vector<nvbench::float64_t> v(size);
+  std::normal_distribution<nvbench::float64_t> dist(mean, mean * rel_std_dev);
+  std::generate(v.begin(), v.end(), [&]{ return dist(gen); });
+  return v;
+}
+
+void test_stdrel()
+{
+  const nvbench::int64_t size = 10;
+  const nvbench::float64_t mean = 42.0;
+  const nvbench::float64_t max_noise = 0.1;
+
+  nvbench::criterion_params params;
+  params.set_float64("max-noise", max_noise);
+
+  nvbench::detail::stdrel_criterion criterion;
+  criterion.initialize(params);
+
+  for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size)) 
+  {
+    criterion.add_measurement(measurement);
+  }
+  ASSERT(criterion.is_finished());
+
+  params.set_float64("max-noise", max_noise);
+  criterion.initialize(params);
+
+  for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size)) 
+  {
+    criterion.add_measurement(measurement);
+  }
+  ASSERT(!criterion.is_finished());
+}
+
+int main()
+{
+  test_const();
+  test_stdrel();
+}