mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Merge pull request #204 from alliepiper/summaries
Add min/max timings, new "summaries" example.
This commit is contained in:
@@ -73,6 +73,7 @@ various NVBench features and usecases:
|
||||
- [Reporting item/sec and byte/sec throughput statistics](examples/throughput.cu)
|
||||
- [Skipping benchmark configurations](examples/skip.cu)
|
||||
- [Benchmarking on a specific stream](examples/stream.cu)
|
||||
- [Adding / hiding columns (summaries) in markdown output](examples/summaries.cu)
|
||||
- [Benchmarks that sync CUDA devices: `nvbench::exec_tag::sync`](examples/exec_tag_sync.cu)
|
||||
- [Manual timing: `nvbench::exec_tag::timer`](examples/exec_tag_timer.cu)
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ set(example_srcs
|
||||
exec_tag_timer.cu
|
||||
skip.cu
|
||||
stream.cu
|
||||
summaries.cu
|
||||
throughput.cu
|
||||
)
|
||||
|
||||
|
||||
70
examples/summaries.cu
Normal file
70
examples/summaries.cu
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright 2025 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
// Grab some testing kernels from NVBench:
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
|
||||
// #define PRINT_DEFAULT_SUMMARY_TAGS
|
||||
|
||||
void summary_example(nvbench::state &state)
|
||||
{
|
||||
// Fetch parameters and compute duration in seconds:
|
||||
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
|
||||
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
|
||||
const auto duration = ms * 1e-3 + us * 1e-6;
|
||||
|
||||
// Add a new column to the summary table with the derived duration used by the benchmark.
|
||||
// See the documentation in nvbench/summary.cuh for more details.
|
||||
{
|
||||
nvbench::summary &summary = state.add_summary("duration");
|
||||
summary.set_string("name", "Duration (s)");
|
||||
summary.set_string("description", "The duration of the kernel execution.");
|
||||
summary.set_string("hint", "duration");
|
||||
summary.set_float64("value", duration);
|
||||
}
|
||||
|
||||
// Run the measurements:
|
||||
state.exec([duration](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration);
|
||||
});
|
||||
|
||||
#ifdef PRINT_DEFAULT_SUMMARY_TAGS
|
||||
// The default summary tags can be found by inspecting the state after calling
|
||||
// state.exec.
|
||||
// They can also be found by looking at the json output (--json <filename>)
|
||||
for (const auto &summary : state.get_summaries())
|
||||
{
|
||||
std::cout << summary.get_tag() << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Default summary columns can be shown/hidden in the markdown output tables by adding/removing
|
||||
// the "hide" key. Modify this benchmark to show the minimum and maximum times, but hide the
|
||||
// means.
|
||||
state.get_summary("nv/cold/time/gpu/min").remove_value("hide");
|
||||
state.get_summary("nv/cold/time/gpu/max").remove_value("hide");
|
||||
state.get_summary("nv/cold/time/gpu/mean").set_string("hide", "");
|
||||
state.get_summary("nv/cold/time/cpu/min").remove_value("hide");
|
||||
state.get_summary("nv/cold/time/cpu/max").remove_value("hide");
|
||||
state.get_summary("nv/cold/time/cpu/mean").set_string("hide", "");
|
||||
}
|
||||
NVBENCH_BENCH(summary_example)
|
||||
.add_int64_axis("ms", nvbench::range(10, 50, 20))
|
||||
.add_int64_axis("us", nvbench::range(100, 500, 200));
|
||||
@@ -25,6 +25,9 @@
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace nvbench::detail
|
||||
@@ -64,13 +67,17 @@ void measure_cold_base::check()
|
||||
|
||||
void measure_cold_base::initialize()
|
||||
{
|
||||
m_total_cuda_time = 0.;
|
||||
m_total_cpu_time = 0.;
|
||||
m_cpu_noise = 0.;
|
||||
m_total_samples = 0;
|
||||
m_min_cuda_time = std::numeric_limits<nvbench::float64_t>::max();
|
||||
m_max_cuda_time = std::numeric_limits<nvbench::float64_t>::lowest();
|
||||
m_total_cuda_time = 0.;
|
||||
m_min_cpu_time = std::numeric_limits<nvbench::float64_t>::max();
|
||||
m_max_cpu_time = std::numeric_limits<nvbench::float64_t>::lowest();
|
||||
m_total_cpu_time = 0.;
|
||||
m_total_samples = 0;
|
||||
m_max_time_exceeded = false;
|
||||
|
||||
m_cuda_times.clear();
|
||||
m_cpu_times.clear();
|
||||
m_max_time_exceeded = false;
|
||||
|
||||
m_stopping_criterion.initialize(m_criterion_params);
|
||||
}
|
||||
@@ -82,10 +89,17 @@ void measure_cold_base::record_measurements()
|
||||
// Update and record timers and counters:
|
||||
const auto cur_cuda_time = m_cuda_timer.get_duration();
|
||||
const auto cur_cpu_time = m_cpu_timer.get_duration();
|
||||
m_cuda_times.push_back(cur_cuda_time);
|
||||
m_cpu_times.push_back(cur_cpu_time);
|
||||
|
||||
m_min_cuda_time = std::min(m_min_cuda_time, cur_cuda_time);
|
||||
m_max_cuda_time = std::max(m_max_cuda_time, cur_cuda_time);
|
||||
m_total_cuda_time += cur_cuda_time;
|
||||
m_cuda_times.push_back(cur_cuda_time);
|
||||
|
||||
m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time);
|
||||
m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time);
|
||||
m_total_cpu_time += cur_cpu_time;
|
||||
m_cpu_times.push_back(cur_cpu_time);
|
||||
|
||||
++m_total_samples;
|
||||
|
||||
m_stopping_criterion.add_measurement(cur_cuda_time);
|
||||
@@ -118,21 +132,10 @@ bool measure_cold_base::is_finished()
|
||||
return false;
|
||||
}
|
||||
|
||||
void measure_cold_base::run_trials_epilogue()
|
||||
{
|
||||
// Only need to compute this at the end, not per iteration.
|
||||
const auto cpu_mean = m_total_cpu_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cpu_stdev = nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(),
|
||||
m_cpu_times.cend(),
|
||||
cpu_mean);
|
||||
m_cpu_noise = cpu_stdev / cpu_mean;
|
||||
|
||||
m_walltime_timer.stop();
|
||||
}
|
||||
void measure_cold_base::run_trials_epilogue() { m_walltime_timer.stop(); }
|
||||
|
||||
void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/sample_size");
|
||||
summ.set_string("name", "Samples");
|
||||
@@ -141,7 +144,30 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/min");
|
||||
summ.set_string("name", "Min CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description",
|
||||
"Fastest isolated kernel execution time "
|
||||
"(measured on host CPU)");
|
||||
summ.set_float64("value", m_min_cpu_time);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/max");
|
||||
summ.set_string("name", "Max CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description",
|
||||
"Slowest isolated kernel execution time "
|
||||
"(measured on host CPU)");
|
||||
summ.set_float64("value", m_max_cpu_time);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
const auto cpu_mean = m_total_cpu_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/mean");
|
||||
summ.set_string("name", "CPU Time");
|
||||
@@ -149,18 +175,53 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_string("description",
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured on host CPU)");
|
||||
summ.set_float64("value", avg_cpu_time);
|
||||
summ.set_float64("value", cpu_mean);
|
||||
}
|
||||
|
||||
const auto cpu_stdev = nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(),
|
||||
m_cpu_times.cend(),
|
||||
cpu_mean);
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/absolute");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated CPU times");
|
||||
summ.set_float64("value", cpu_stdev);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
const auto cpu_noise = cpu_stdev / cpu_mean;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated CPU times");
|
||||
summ.set_float64("value", m_cpu_noise);
|
||||
summ.set_float64("value", cpu_noise);
|
||||
}
|
||||
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/min");
|
||||
summ.set_string("name", "Min GPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description",
|
||||
"Fastest isolated kernel execution time "
|
||||
"(measured with CUDA events)");
|
||||
summ.set_float64("value", m_min_cuda_time);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/max");
|
||||
summ.set_string("name", "Max GPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description",
|
||||
"Slowest isolated kernel execution time "
|
||||
"(measured with CUDA events)");
|
||||
summ.set_float64("value", m_max_cuda_time);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
const auto cuda_mean = m_total_cuda_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/mean");
|
||||
summ.set_string("name", "GPU Time");
|
||||
@@ -168,24 +229,28 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_string("description",
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured with CUDA events)");
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
summ.set_float64("value", cuda_mean);
|
||||
}
|
||||
|
||||
const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
mean_cuda_time);
|
||||
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
const auto noise = cuda_rel_stdev;
|
||||
const auto max_noise = m_criterion_params.get_float64("max-noise");
|
||||
const auto min_time = m_criterion_params.get_float64("min-time");
|
||||
cuda_mean);
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/absolute");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated GPU times");
|
||||
summ.set_float64("value", cuda_stdev);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
const auto cuda_noise = cuda_stdev / cuda_mean;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated GPU times");
|
||||
summ.set_float64("value", noise);
|
||||
summ.set_float64("value", cuda_noise);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
@@ -194,12 +259,12 @@ void measure_cold_base::generate_summaries()
|
||||
summ.set_string("name", "Elem/s");
|
||||
summ.set_string("hint", "item_rate");
|
||||
summ.set_string("description", "Number of input elements processed per second");
|
||||
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
|
||||
summ.set_float64("value", static_cast<double>(items) / cuda_mean);
|
||||
}
|
||||
|
||||
if (const auto bytes = m_state.get_global_memory_rw_bytes(); bytes != 0)
|
||||
{
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / cuda_mean;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/bw/global/bytes_per_second");
|
||||
summ.set_string("name", "GlobalMem BW");
|
||||
@@ -240,16 +305,18 @@ void measure_cold_base::generate_summaries()
|
||||
|
||||
if (m_max_time_exceeded)
|
||||
{
|
||||
const auto timeout = m_walltime_timer.get_duration();
|
||||
const auto timeout = m_walltime_timer.get_duration();
|
||||
const auto max_noise = m_criterion_params.get_float64("max-noise");
|
||||
const auto min_time = m_criterion_params.get_float64("min-time");
|
||||
|
||||
if (noise > max_noise)
|
||||
if (cuda_noise > max_noise)
|
||||
{
|
||||
printer.log(nvbench::log_level::warn,
|
||||
fmt::format("Current measurement timed out ({:0.2f}s) "
|
||||
"while over noise threshold ({:0.2f}% > "
|
||||
"{:0.2f}%)",
|
||||
timeout,
|
||||
noise * 100,
|
||||
cuda_noise * 100,
|
||||
max_noise * 100));
|
||||
}
|
||||
if (m_total_samples < m_min_samples)
|
||||
@@ -277,8 +344,8 @@ void measure_cold_base::generate_summaries()
|
||||
printer.log(nvbench::log_level::pass,
|
||||
fmt::format("Cold: {:0.6f}ms GPU, {:0.6f}ms CPU, {:0.2f}s "
|
||||
"total GPU, {:0.2f}s total wall, {}x ",
|
||||
avg_cuda_time * 1e3,
|
||||
avg_cpu_time * 1e3,
|
||||
cuda_mean * 1e3,
|
||||
cpu_mean * 1e3,
|
||||
m_total_cuda_time,
|
||||
m_walltime_timer.get_duration(),
|
||||
m_total_samples));
|
||||
|
||||
@@ -98,9 +98,14 @@ protected:
|
||||
nvbench::float64_t m_timeout{};
|
||||
|
||||
nvbench::int64_t m_total_samples{};
|
||||
|
||||
nvbench::float64_t m_min_cuda_time{};
|
||||
nvbench::float64_t m_max_cuda_time{};
|
||||
nvbench::float64_t m_total_cuda_time{};
|
||||
|
||||
nvbench::float64_t m_min_cpu_time{};
|
||||
nvbench::float64_t m_max_cpu_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
nvbench::float64_t m_cpu_noise{}; // rel stdev
|
||||
|
||||
std::vector<nvbench::float64_t> m_cuda_times;
|
||||
std::vector<nvbench::float64_t> m_cpu_times;
|
||||
|
||||
@@ -76,8 +76,10 @@ protected:
|
||||
nvbench::float64_t m_timeout{};
|
||||
|
||||
nvbench::int64_t m_total_samples{};
|
||||
|
||||
nvbench::float64_t m_min_cpu_time{};
|
||||
nvbench::float64_t m_max_cpu_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
nvbench::float64_t m_cpu_noise{}; // rel stdev
|
||||
|
||||
std::vector<nvbench::float64_t> m_cpu_times;
|
||||
|
||||
|
||||
@@ -24,6 +24,9 @@
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace nvbench::detail
|
||||
@@ -52,12 +55,15 @@ void measure_cpu_only_base::check()
|
||||
|
||||
void measure_cpu_only_base::initialize()
|
||||
{
|
||||
m_total_cpu_time = 0.;
|
||||
m_cpu_noise = 0.;
|
||||
m_total_samples = 0;
|
||||
m_cpu_times.clear();
|
||||
|
||||
m_min_cpu_time = std::numeric_limits<nvbench::float64_t>::max();
|
||||
m_max_cpu_time = std::numeric_limits<nvbench::float64_t>::lowest();
|
||||
m_total_cpu_time = 0.;
|
||||
m_total_samples = 0;
|
||||
m_max_time_exceeded = false;
|
||||
|
||||
m_cpu_times.clear();
|
||||
|
||||
m_stopping_criterion.initialize(m_criterion_params);
|
||||
}
|
||||
|
||||
@@ -67,8 +73,12 @@ void measure_cpu_only_base::record_measurements()
|
||||
{
|
||||
// Update and record timers and counters:
|
||||
const auto cur_cpu_time = m_cpu_timer.get_duration();
|
||||
m_cpu_times.push_back(cur_cpu_time);
|
||||
|
||||
m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time);
|
||||
m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time);
|
||||
m_total_cpu_time += cur_cpu_time;
|
||||
m_cpu_times.push_back(cur_cpu_time);
|
||||
|
||||
++m_total_samples;
|
||||
|
||||
m_stopping_criterion.add_measurement(cur_cpu_time);
|
||||
@@ -101,17 +111,7 @@ bool measure_cpu_only_base::is_finished()
|
||||
return false;
|
||||
}
|
||||
|
||||
void measure_cpu_only_base::run_trials_epilogue()
|
||||
{
|
||||
// Only need to compute this at the end, not per iteration.
|
||||
const auto cpu_mean = m_total_cpu_time / static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto cpu_stdev = nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(),
|
||||
m_cpu_times.cend(),
|
||||
cpu_mean);
|
||||
m_cpu_noise = cpu_stdev / cpu_mean;
|
||||
|
||||
m_walltime_timer.stop();
|
||||
}
|
||||
void measure_cpu_only_base::run_trials_epilogue() { m_walltime_timer.stop(); }
|
||||
|
||||
void measure_cpu_only_base::generate_summaries()
|
||||
{
|
||||
@@ -123,24 +123,53 @@ void measure_cpu_only_base::generate_summaries()
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/min");
|
||||
summ.set_string("name", "Min CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Fastest CPU time of isolated kernel executions");
|
||||
summ.set_float64("value", m_min_cpu_time);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/max");
|
||||
summ.set_string("name", "Max CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Slowest CPU time of isolated kernel executions");
|
||||
summ.set_float64("value", m_max_cpu_time);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
const auto d_samples = static_cast<nvbench::float64_t>(m_total_samples);
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_samples;
|
||||
const auto cpu_mean = m_total_cpu_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/mean");
|
||||
summ.set_string("name", "CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description",
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured on host CPU)");
|
||||
summ.set_float64("value", avg_cpu_time);
|
||||
summ.set_string("description", "Mean CPU time of isolated kernel executions");
|
||||
summ.set_float64("value", cpu_mean);
|
||||
}
|
||||
|
||||
const auto cpu_stdev = nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(),
|
||||
m_cpu_times.cend(),
|
||||
cpu_mean);
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/stdev/absolute");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated CPU times");
|
||||
summ.set_float64("value", cpu_stdev);
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
const auto cpu_noise = cpu_stdev / cpu_mean;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("description", "Relative standard deviation of isolated CPU times");
|
||||
summ.set_float64("value", m_cpu_noise);
|
||||
summ.set_float64("value", cpu_noise);
|
||||
}
|
||||
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
@@ -149,12 +178,12 @@ void measure_cpu_only_base::generate_summaries()
|
||||
summ.set_string("name", "Elem/s");
|
||||
summ.set_string("hint", "item_rate");
|
||||
summ.set_string("description", "Number of input elements processed per second");
|
||||
summ.set_float64("value", static_cast<double>(items) / avg_cpu_time);
|
||||
summ.set_float64("value", static_cast<double>(items) / cpu_mean);
|
||||
}
|
||||
|
||||
if (const auto bytes = m_state.get_global_memory_rw_bytes(); bytes != 0)
|
||||
{
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cpu_time;
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / cpu_mean;
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/bw/global/bytes_per_second");
|
||||
summ.set_string("name", "GlobalMem BW");
|
||||
@@ -185,14 +214,14 @@ void measure_cpu_only_base::generate_summaries()
|
||||
const auto max_noise = m_criterion_params.get_float64("max-noise");
|
||||
const auto min_time = m_criterion_params.get_float64("min-time");
|
||||
|
||||
if (m_cpu_noise > max_noise)
|
||||
if (cpu_noise > max_noise)
|
||||
{
|
||||
printer.log(nvbench::log_level::warn,
|
||||
fmt::format("Current measurement timed out ({:0.2f}s) "
|
||||
"while over noise threshold ({:0.2f}% > "
|
||||
"{:0.2f}%)",
|
||||
timeout,
|
||||
m_cpu_noise * 100,
|
||||
cpu_noise * 100,
|
||||
max_noise * 100));
|
||||
}
|
||||
if (m_total_samples < m_min_samples)
|
||||
@@ -220,7 +249,7 @@ void measure_cpu_only_base::generate_summaries()
|
||||
printer.log(nvbench::log_level::pass,
|
||||
fmt::format("CpuOnly: {:0.6f}ms mean CPU, {:0.2f}s total CPU, "
|
||||
"{:0.2f}s total wall, {}x ",
|
||||
avg_cpu_time * 1e3,
|
||||
cpu_mean * 1e3,
|
||||
m_total_cpu_time,
|
||||
m_walltime_timer.get_duration(),
|
||||
m_total_samples));
|
||||
|
||||
Reference in New Issue
Block a user