From d079a499768ce9dc3095ddde94d34f5e906cd2ec Mon Sep 17 00:00:00 2001
From: Georgy Evtushenko <evtushenko.georgy@gmail.com>
Date: Tue, 9 Jan 2024 14:57:29 -0800
Subject: [PATCH] Compute mean once

---
 nvbench/detail/entropy_criterion.cxx |  7 ++--
 nvbench/detail/statistics.cuh        | 51 +++++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/nvbench/detail/entropy_criterion.cxx b/nvbench/detail/entropy_criterion.cxx
index f420287..77c2071 100644
--- a/nvbench/detail/entropy_criterion.cxx
+++ b/nvbench/detail/entropy_criterion.cxx
@@ -126,16 +126,17 @@ bool entropy_criterion::is_finished()
   }
 
   auto begin = m_entropy_tracker.cbegin();
-  auto end = m_entropy_tracker.cend();
+  auto end   = m_entropy_tracker.cend();
+  auto mean  = statistics::compute_mean(begin, end);
 
-  const auto [slope, intercept] = statistics::compute_linear_regression(begin, end);
+  const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
 
   if (statistics::slope2deg(slope) > m_max_angle) 
   {
     return false;
   }
 
-  const auto r2 = statistics::compute_r2(begin, end, slope, intercept);
+  const auto r2 = statistics::compute_r2(begin, end, mean, slope, intercept);
   if (r2 < m_min_r2)
   {
     return false;
diff --git a/nvbench/detail/statistics.cuh b/nvbench/detail/statistics.cuh
index 1fece39..358bb6c 100644
--- a/nvbench/detail/statistics.cuh
+++ b/nvbench/detail/statistics.cuh
@@ -62,13 +62,22 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean)
   return std::sqrt(variance);
 }
 
+template <class It>
+nvbench::float64_t compute_mean(It first, It last)
+{
+  const auto n = std::distance(first, last);
+  return std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(n);
+}
+
 /**
  * Computes linear regression and returns the slope and intercept
  *
+ * This version takes precomputed mean of [first, last).
  * If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
  */
 template <class It>
-std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It first, It last)
+std::pair<nvbench::float64_t, nvbench::float64_t>
+compute_linear_regression(It first, It last, nvbench::float64_t mean_y)
 {
   const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
 
@@ -80,10 +89,9 @@ std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It f
 
   // Assuming x starts from 0
   const nvbench::float64_t mean_x = (static_cast<nvbench::float64_t>(n) - 1.0) / 2.0;
-  const nvbench::float64_t mean_y = std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(n);
 
   // Calculate the numerator and denominator for the slope
-  nvbench::float64_t numerator = 0.0;
+  nvbench::float64_t numerator   = 0.0;
   nvbench::float64_t denominator = 0.0;
 
   for (std::size_t i = 0; i < n; ++i, ++first)
@@ -94,22 +102,37 @@ std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It f
   }
 
   // Calculate the slope and intercept
-  const nvbench::float64_t slope = numerator / denominator;
+  const nvbench::float64_t slope     = numerator / denominator;
   const nvbench::float64_t intercept = mean_y - slope * mean_x;
 
   return std::make_pair(slope, intercept);
 }
 
 /**
- * Computes and returns the R^2 (coefficient of determination)
- */ 
+ * Computes linear regression and returns the slope and intercept
+ *
+ * If the input has fewer than 2 samples, infinity is returned for both slope and intercept.
+ */
 template <class It>
-nvbench::float64_t compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept)
+std::pair<nvbench::float64_t, nvbench::float64_t> compute_linear_regression(It first, It last)
+{
+  return compute_linear_regression(first, last, compute_mean(first, last));
+}
+
+/**
+ * Computes and returns the R^2 (coefficient of determination)
+ *
+ * This version takes precomputed mean of [first, last).
+ */
+template <class It>
+nvbench::float64_t compute_r2(It first,
+                              It last,
+                              nvbench::float64_t mean_y,
+                              nvbench::float64_t slope,
+                              nvbench::float64_t intercept)
 {
   const std::size_t n = static_cast<std::size_t>(std::distance(first, last));
 
-  const nvbench::float64_t mean_y = std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(n);
-
   nvbench::float64_t ss_tot = 0.0;
   nvbench::float64_t ss_res = 0.0;
 
@@ -130,6 +153,16 @@ nvbench::float64_t compute_r2(It first, It last, nvbench::float64_t slope, nvben
   return 1.0 - ss_res / ss_tot;
 }
 
+/**
+ * Computes and returns the R^2 (coefficient of determination)
+ */
+template <class It>
+nvbench::float64_t
+compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept)
+{
+  return compute_r2(first, last, compute_mean(first, last), slope, intercept);
+}
+
 inline nvbench::float64_t rad2deg(nvbench::float64_t rad)
 {
   return rad * 180.0 / M_PI;