mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-05-12 09:15:47 +00:00
Add statistics::compute_percentiles, use it in summaries of measure_cold
Percentiles on empty dataset are NaN, not infinity Add Robust statistics of CPU times to summary Fixed name for nv/cold/time/gpu/q3, corrected value reported for nv/cold/time/gpu/ir/relative Use median and IR to compute location and noise in measure_cold Also in stdrel_criterion, compute noise as IR / median.
This commit is contained in:
@@ -206,6 +206,7 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
// cpu time statistics
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/min");
|
||||
summ.set_string("name", "Min CPU Time");
|
||||
@@ -237,6 +238,7 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_string("description",
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured on host CPU)");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cpu_mean);
|
||||
}
|
||||
|
||||
@@ -247,7 +249,7 @@ void measure_cold_base::generate_summaries()
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/absolute");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Standard deviation of isolated CPU times");
|
||||
summ.set_string("description", "Standard deviation of isolated kernel execution CPU times");
|
||||
summ.set_float64("value", cpu_stdev);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
@@ -257,10 +259,60 @@ void measure_cold_base::generate_summaries()
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated CPU times");
|
||||
summ.set_string("description",
|
||||
"Relative standard deviation of isolated kernel execution CPU times");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cpu_noise);
|
||||
}
|
||||
|
||||
const auto [cpu_time_first_quartile, cpu_time_median, cpu_time_third_quartile] =
|
||||
nvbench::detail::statistics::compute_percentiles(m_cpu_times.cbegin(),
|
||||
m_cpu_times.cend(),
|
||||
{25, 50, 75});
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/q1");
|
||||
summ.set_string("name", "Q1");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "First quartile of isolated kernel execution CPU times");
|
||||
summ.set_float64("value", cpu_time_first_quartile);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/median");
|
||||
summ.set_string("name", "CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Median of isolated kernel execution CPU times");
|
||||
summ.set_float64("value", cpu_time_median);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/q3");
|
||||
summ.set_string("name", "Q3");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Third quartile of isolated kernel execution CPU times");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cpu_time_third_quartile);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/ir/absolute");
|
||||
summ.set_string("name", "IR");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Interquartile range of isolated kernel execution CPU times");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
const auto cpu_time_ir = cpu_time_third_quartile - cpu_time_first_quartile;
|
||||
summ.set_float64("value", cpu_time_ir);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/ir/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description",
|
||||
"Relative interquartile range of isolated kernel execution CPU times");
|
||||
const auto cpu_time_ir = cpu_time_third_quartile - cpu_time_first_quartile;
|
||||
const auto cpu_robust_noise = cpu_time_ir / cpu_time_median;
|
||||
summ.set_float64("value", cpu_robust_noise);
|
||||
}
|
||||
|
||||
// gpu time statistics
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/min");
|
||||
summ.set_string("name", "Min GPU Time");
|
||||
@@ -291,6 +343,7 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_string("description",
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured with CUDA events)");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cuda_mean);
|
||||
}
|
||||
|
||||
@@ -301,7 +354,7 @@ void measure_cold_base::generate_summaries()
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/absolute");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Standard deviation of isolated GPU times");
|
||||
summ.set_string("description", "Standard deviation of isolated kernel execution GPU times");
|
||||
summ.set_float64("value", cuda_stdev);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
@@ -311,10 +364,59 @@ void measure_cold_base::generate_summaries()
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated GPU times");
|
||||
summ.set_string("description",
|
||||
"Relative standard deviation of isolated kernel execution GPU times");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cuda_noise);
|
||||
}
|
||||
|
||||
const auto [cuda_time_first_quartile, cuda_time_median, cuda_time_third_quartile] =
|
||||
nvbench::detail::statistics::compute_percentiles(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
{25, 50, 75});
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/q1");
|
||||
summ.set_string("name", "Q1");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "First quartile of isolated kernel execution GPU times");
|
||||
summ.set_float64("value", cuda_time_first_quartile);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/median");
|
||||
summ.set_string("name", "GPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Median of isolated kernel execution GPU times");
|
||||
summ.set_float64("value", cuda_time_median);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/q3");
|
||||
summ.set_string("name", "Q3");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Third quartile of isolated kernel execution GPU times");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cuda_time_third_quartile);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/ir/absolute");
|
||||
summ.set_string("name", "IR");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Interquartile range of isolated kernel execution GPU times");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
const auto cuda_time_ir = cuda_time_third_quartile - cuda_time_first_quartile;
|
||||
summ.set_float64("value", cuda_time_ir);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/ir/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description",
|
||||
"Relative interquartile range of isolated kernel execution GPU times");
|
||||
const auto cuda_time_ir = cuda_time_third_quartile - cuda_time_first_quartile;
|
||||
const auto cuda_robust_noise = cuda_time_ir / cuda_time_median;
|
||||
summ.set_float64("value", cuda_robust_noise);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/bw/item_rate");
|
||||
|
||||
@@ -173,6 +173,56 @@ void measure_cpu_only_base::generate_summaries()
|
||||
summ.set_float64("value", cpu_noise);
|
||||
}
|
||||
|
||||
const auto [cpu_first_quartile, cpu_median, cpu_third_quartile] =
|
||||
nvbench::detail::statistics::compute_percentiles(m_cpu_times.cbegin(),
|
||||
m_cpu_times.cend(),
|
||||
{25, 50, 75});
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/q1");
|
||||
summ.set_string("name", "Q1");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "First quartile of CPU times of isolated kernel executions");
|
||||
summ.set_float64("value", cpu_first_quartile);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/median");
|
||||
summ.set_string("name", "Median");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Median of CPU times of isolated kernel executions");
|
||||
summ.set_float64("value", cpu_median);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/q3");
|
||||
summ.set_string("name", "Q3");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Third quartile of CPU times of isolated kernel executions");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
summ.set_float64("value", cpu_third_quartile);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/ir/absolute");
|
||||
summ.set_string("name", "IR");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description",
|
||||
"Interquartile range of CPU times of isolated kernel executions");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
const auto cpu_ir = cpu_third_quartile - cpu_first_quartile;
|
||||
summ.set_float64("value", cpu_ir);
|
||||
}
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/ir/relative");
|
||||
summ.set_string("name", "IR");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description",
|
||||
"Relative interquartile range of CPU times of isolated kernel executions");
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
const auto cpu_ir = cpu_third_quartile - cpu_first_quartile;
|
||||
const auto cpu_robust_noise = cpu_ir / cpu_median;
|
||||
summ.set_float64("value", cpu_robust_noise);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/bw/item_rate");
|
||||
|
||||
@@ -31,12 +31,15 @@
|
||||
#include <nvbench/detail/transform_reduce.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
@@ -93,6 +96,56 @@ nvbench::float64_t compute_mean(It first, It last)
|
||||
return std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(num);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes exact percentile values using rank round(p / 100 * (S - 1)).
|
||||
*
|
||||
* The input range is copied before sorting, so const iterators are supported.
|
||||
* If the input has fewer than 1 sample, all percentiles are returned as infinity.
|
||||
*/
|
||||
template <typename Iter,
|
||||
std::size_t N,
|
||||
typename ValueType = typename std::iterator_traits<Iter>::value_type>
|
||||
std::array<ValueType, N> compute_percentiles(Iter first, Iter last, std::array<int, N> percentiles)
|
||||
{
|
||||
std::array<ValueType, N> result{};
|
||||
|
||||
const auto num = std::distance(first, last);
|
||||
if (num < 1)
|
||||
{
|
||||
result.fill(std::numeric_limits<ValueType>::quiet_NaN());
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<ValueType> sorted(first, last);
|
||||
std::sort(sorted.begin(), sorted.end());
|
||||
|
||||
const auto max_rank = static_cast<nvbench::float64_t>(sorted.size() - 1);
|
||||
for (std::size_t i = 0; i < N; ++i)
|
||||
{
|
||||
const auto clamped_percentile = std::clamp(percentiles[i], 0, 100);
|
||||
|
||||
const auto quantile = static_cast<nvbench::float64_t>(clamped_percentile) / 100.0;
|
||||
const auto rank = static_cast<std::size_t>(std::round(quantile * max_rank));
|
||||
|
||||
result[i] = sorted[rank];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overload that supports calls like `compute_percentiles(first, last, {25, 50, 75})`.
|
||||
*/
|
||||
template <typename Iter,
|
||||
std::size_t N,
|
||||
typename ValueType = typename std::iterator_traits<Iter>::value_type>
|
||||
std::array<ValueType, N> compute_percentiles(Iter first, Iter last, const int (&percentiles)[N])
|
||||
{
|
||||
std::array<int, N> percentile_array{};
|
||||
std::copy(std::begin(percentiles), std::end(percentiles), percentile_array.begin());
|
||||
return compute_percentiles(first, last, percentile_array);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes linear regression and returns the slope and intercept
|
||||
*
|
||||
|
||||
@@ -42,14 +42,15 @@ void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
|
||||
m_cuda_times.push_back(measurement);
|
||||
|
||||
// Compute convergence statistics using CUDA timings:
|
||||
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
mean_cuda_time);
|
||||
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
if (std::isfinite(cuda_rel_stdev))
|
||||
const auto [cuda_first_quartile, cuda_median, cuda_third_quartile] =
|
||||
nvbench::detail::statistics::compute_percentiles(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
{25, 50, 75});
|
||||
const auto cuda_noise = (cuda_third_quartile - cuda_first_quartile) / cuda_median;
|
||||
|
||||
if (std::isfinite(cuda_noise))
|
||||
{
|
||||
m_noise_tracker.push_back(cuda_rel_stdev);
|
||||
m_noise_tracker.push_back(cuda_noise);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +72,7 @@ bool stdrel_criterion::do_is_finished()
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the noise (cuda rel stdev) has converged by inspecting a
|
||||
// Check if the noise has converged by inspecting a
|
||||
// trailing window of recorded noise measurements.
|
||||
// This helps identify benchmarks that are inherently noisy and would
|
||||
// never converge to the target stdev threshold. This check ensures that the
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
@@ -52,6 +53,50 @@ void test_std()
|
||||
ASSERT(std::abs(actual - expected) < 0.001);
|
||||
}
|
||||
|
||||
void test_percentiles()
|
||||
{
|
||||
{
|
||||
const std::vector<nvbench::float64_t> data{40.0, 10.0, 30.0, 20.0};
|
||||
const auto actual = statistics::compute_percentiles(data.cbegin(),
|
||||
data.cend(),
|
||||
std::array<int, 5>{0, 25, 50, 75, 100});
|
||||
const std::array<nvbench::float64_t, 5> expected{10.0, 20.0, 30.0, 30.0, 40.0};
|
||||
ASSERT(actual == expected);
|
||||
}
|
||||
|
||||
{
|
||||
const std::vector<nvbench::float64_t> data{42.0};
|
||||
const auto actual =
|
||||
statistics::compute_percentiles(data.cbegin(), data.cend(), std::array<int, 3>{25, 50, 75});
|
||||
const std::array<nvbench::float64_t, 3> expected{42.0, 42.0, 42.0};
|
||||
ASSERT(actual == expected);
|
||||
}
|
||||
|
||||
{
|
||||
const std::vector<nvbench::float64_t> data{40.0, 10.0, 30.0, 20.0};
|
||||
const auto actual = statistics::compute_percentiles(data.cbegin(), data.cend(), {25, 50, 75});
|
||||
const std::array<nvbench::float64_t, 3> expected{20.0, 30.0, 30.0};
|
||||
ASSERT(actual == expected);
|
||||
}
|
||||
|
||||
{
|
||||
const std::vector<nvbench::float64_t> data{10.0, 20.0, 30.0, 40.0};
|
||||
const auto actual =
|
||||
statistics::compute_percentiles(data.cbegin(), data.cend(), std::array<int, 2>{-25, 125});
|
||||
const std::array<nvbench::float64_t, 2> expected{10.0, 40.0};
|
||||
ASSERT(actual == expected);
|
||||
}
|
||||
|
||||
{
|
||||
const std::vector<nvbench::float64_t> data;
|
||||
const auto actual =
|
||||
statistics::compute_percentiles(data.cbegin(), data.cend(), std::array<int, 3>{25, 50, 75});
|
||||
ASSERT(!std::isfinite(actual[0]));
|
||||
ASSERT(!std::isfinite(actual[1]));
|
||||
ASSERT(!std::isfinite(actual[2]));
|
||||
}
|
||||
}
|
||||
|
||||
void test_lin_regression()
|
||||
{
|
||||
{
|
||||
@@ -126,6 +171,7 @@ int main()
|
||||
{
|
||||
test_mean();
|
||||
test_std();
|
||||
test_percentiles();
|
||||
test_lin_regression();
|
||||
test_r2();
|
||||
test_slope_conversion();
|
||||
|
||||
Reference in New Issue
Block a user