mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-05-19 04:18:52 +00:00
Merge remote-tracking branch 'upstream/main' into add-bench-result
This commit is contained in:
137
.coderabbit.yaml
Normal file
137
.coderabbit.yaml
Normal file
@@ -0,0 +1,137 @@
|
||||
# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
language: en-US
|
||||
tone_instructions: |
|
||||
Be direct, technical, brief. No praise, emojis, headings, or collapsible menus.
|
||||
Start each comment with one prefix:
|
||||
- suggestion: optional improvement;
|
||||
- important: must-fix/high-impact risk;
|
||||
- critical: blocking correctness/security/data-loss.
|
||||
|
||||
reviews:
|
||||
profile: chill
|
||||
|
||||
high_level_summary: true
|
||||
high_level_summary_in_walkthrough: true
|
||||
poem: false
|
||||
in_progress_fortune: false
|
||||
sequence_diagrams: false
|
||||
estimate_code_review_effort: false
|
||||
collapse_walkthrough: true
|
||||
|
||||
# Reduce noisy status/details sections.
|
||||
request_changes_workflow: false
|
||||
review_status: false
|
||||
review_details: false
|
||||
enable_prompt_for_ai_agents: false
|
||||
|
||||
auto_review:
|
||||
enabled: false
|
||||
drafts: false
|
||||
base_branches:
|
||||
- "^main$"
|
||||
- "^branch/[0-9]+\\.[0-9]+\\.x$"
|
||||
ignore_usernames: ["copy-pr-bot", "dependabot[bot]", "github-actions[bot]", "nv-automation-bot"]
|
||||
|
||||
tools:
|
||||
gitleaks:
|
||||
enabled: true
|
||||
markdownlint:
|
||||
enabled: true
|
||||
shellcheck:
|
||||
enabled: true
|
||||
|
||||
# Keep Autofix available, but disable the other finishing touch actions.
|
||||
finishing_touches:
|
||||
docstrings:
|
||||
enabled: false
|
||||
unit_tests:
|
||||
enabled: false
|
||||
simplify:
|
||||
enabled: false
|
||||
|
||||
pre_merge_checks:
|
||||
docstrings:
|
||||
mode: "off"
|
||||
title:
|
||||
mode: "off"
|
||||
description:
|
||||
mode: "off"
|
||||
issue_assessment:
|
||||
mode: "off"
|
||||
custom_checks: []
|
||||
|
||||
path_instructions:
|
||||
- path: "nvbench/**/*"
|
||||
instructions: |
|
||||
Focus on benchmark correctness, CUDA stream/event ordering, synchronization behavior, error handling,
|
||||
resource ownership, exception safety, public API compatibility, measurement semantics, statistical
|
||||
summaries, and test coverage. Prefer comments that catch correctness, API, compile-time, runtime, or
|
||||
measurement-regression risks.
|
||||
|
||||
- path: "python/**/*"
|
||||
instructions: |
|
||||
Focus on Python API stability, pybind11/C++ exception boundaries, GIL behavior, CUDA interoperability,
|
||||
object lifetime, package metadata, type stubs, JSON/result parsing compatibility, and tests. Avoid
|
||||
style-only comments already covered by Ruff, clang-format, or pre-commit.
|
||||
|
||||
- path: "testing/**/*"
|
||||
instructions: |
|
||||
Focus on whether tests cover observable behavior, remain deterministic, handle GPU availability and CUDA
|
||||
version differences correctly, avoid excessive runtime, and exercise install/export/package boundaries
|
||||
where relevant.
|
||||
|
||||
- path: "examples/**/*"
|
||||
instructions: |
|
||||
Check that examples are minimal, buildable, technically correct, use NVBench APIs idiomatically, avoid
|
||||
excessive benchmark runtime, and demonstrate behavior that is useful to users.
|
||||
|
||||
- path: "docs/**/*"
|
||||
instructions: |
|
||||
For documentation changes, focus on technical accuracy, buildable examples, CLI/API consistency,
|
||||
version compatibility, and whether behavior changes have matching documentation updates.
|
||||
|
||||
- path: "ci/**/*"
|
||||
instructions: |
|
||||
For CI and build scripts, focus on matrix correctness, targeted build/test behavior, cache/artifact
|
||||
handling, environment setup, GPU availability assumptions, clear failures, and avoiding unnecessary
|
||||
expensive jobs.
|
||||
|
||||
- path: ".github/**/*"
|
||||
instructions: |
|
||||
For GitHub workflows and repository automation, focus on permissions, event triggers, matrix generation,
|
||||
status/check behavior, security boundaries, and avoiding unnecessary CI fanout.
|
||||
|
||||
- path: "cmake/**/*"
|
||||
instructions: |
|
||||
Focus on package exports, install-tree and build-tree compatibility, target usage requirements, static
|
||||
and shared library behavior, CUDA architecture handling, and compatibility across supported CMake/CUDA
|
||||
versions.
|
||||
|
||||
- path: "**/CMakeLists.txt"
|
||||
instructions: |
|
||||
Focus on target dependencies, exported usage requirements, option behavior, install rules, tests,
|
||||
examples, Python package integration, and compatibility across supported CMake/CUDA versions.
|
||||
|
||||
- path: "CMakePresets.json"
|
||||
instructions: |
|
||||
Focus on preset inheritance, CI parity, cache variable correctness, CUDA compiler/toolchain assumptions,
|
||||
and whether presets remain useful for local and automated builds.
|
||||
|
||||
knowledge_base:
|
||||
opt_out: false
|
||||
code_guidelines:
|
||||
filePatterns:
|
||||
- ".clang-format"
|
||||
- ".pre-commit-config.yaml"
|
||||
- "README.md"
|
||||
- "CMakeLists.txt"
|
||||
- "CMakePresets.json"
|
||||
- "pyproject.toml"
|
||||
- "python/README.md"
|
||||
- "python/pyproject.toml"
|
||||
- "docs/benchmarks.md"
|
||||
- "docs/cli_help.md"
|
||||
- "docs/cli_help_axis.md"
|
||||
@@ -93,6 +93,21 @@
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--cold-warmup-runs <count>`
|
||||
* Execute up to `<count>` warmup runs before collecting cold measurement samples.
|
||||
* The minimum is 1 warmup run.
|
||||
* Default is 1 warmup run.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--cold-max-warmup-walltime <seconds>`
|
||||
* Stop cold warmup after the total warmup walltime exceeds `<seconds>`.
|
||||
* The limit is checked after each warmup run, so actual warmup time may exceed
|
||||
this value by one warmup run.
|
||||
* Default is -1 seconds (disabled).
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--throttle-threshold <value>`
|
||||
* Set the GPU throttle threshold as percentage of the device's default clock rate.
|
||||
* Default is 75.
|
||||
|
||||
@@ -166,6 +166,28 @@ struct benchmark_base
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Execute this many warmup runs before collecting cold measurement samples. @{
|
||||
[[nodiscard]] nvbench::int64_t get_cold_warmup_runs() const { return m_cold_warmup_runs; }
|
||||
benchmark_base &set_cold_warmup_runs(nvbench::int64_t cold_warmup_runs)
|
||||
{
|
||||
m_cold_warmup_runs = cold_warmup_runs > nvbench::int64_t{0} ? cold_warmup_runs
|
||||
: nvbench::int64_t{1};
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Stop cold warmups after this many seconds of walltime. Negative values disable the limit. @{
|
||||
[[nodiscard]] nvbench::float64_t get_cold_max_warmup_walltime() const
|
||||
{
|
||||
return m_cold_max_warmup_walltime;
|
||||
}
|
||||
benchmark_base &set_cold_max_warmup_walltime(nvbench::float64_t cold_max_warmup_walltime)
|
||||
{
|
||||
m_cold_max_warmup_walltime = cold_max_warmup_walltime;
|
||||
return *this;
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// If true, the benchmark measurements only record CPU time and assume no GPU work is performed.
|
||||
/// @{
|
||||
[[nodiscard]] bool get_is_cpu_only() const { return m_is_cpu_only; }
|
||||
@@ -321,7 +343,9 @@ protected:
|
||||
bool m_skip_batched{false};
|
||||
|
||||
nvbench::int64_t m_min_samples{10};
|
||||
nvbench::int64_t m_cold_warmup_runs{1};
|
||||
|
||||
nvbench::float64_t m_cold_max_warmup_walltime{-1.};
|
||||
nvbench::float64_t m_skip_time{-1.};
|
||||
nvbench::float64_t m_timeout{15.};
|
||||
|
||||
|
||||
@@ -43,7 +43,9 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
|
||||
result->m_run_once = m_run_once;
|
||||
result->m_disable_blocking_kernel = m_disable_blocking_kernel;
|
||||
|
||||
result->m_min_samples = m_min_samples;
|
||||
result->m_min_samples = m_min_samples;
|
||||
result->m_cold_warmup_runs = m_cold_warmup_runs;
|
||||
result->m_cold_max_warmup_walltime = m_cold_max_warmup_walltime;
|
||||
|
||||
result->m_skip_time = m_skip_time;
|
||||
result->m_timeout = m_timeout;
|
||||
|
||||
@@ -46,6 +46,8 @@ measure_cold_base::measure_cold_base(state &exec_state)
|
||||
, m_run_once{exec_state.get_run_once()}
|
||||
, m_check_throttling(!exec_state.get_run_once())
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_cold_warmup_runs{exec_state.get_cold_warmup_runs()}
|
||||
, m_cold_max_warmup_walltime{exec_state.get_cold_max_warmup_walltime()}
|
||||
, m_skip_time{exec_state.get_skip_time()}
|
||||
, m_timeout{exec_state.get_timeout()}
|
||||
, m_throttle_threshold(exec_state.get_throttle_threshold())
|
||||
|
||||
@@ -110,7 +110,9 @@ protected:
|
||||
bool m_check_throttling{true};
|
||||
|
||||
nvbench::int64_t m_min_samples{};
|
||||
nvbench::int64_t m_cold_warmup_runs{1};
|
||||
|
||||
nvbench::float64_t m_cold_max_warmup_walltime{};
|
||||
nvbench::float64_t m_skip_time{};
|
||||
nvbench::float64_t m_timeout{};
|
||||
|
||||
@@ -239,8 +241,8 @@ struct measure_cold : public measure_cold_base
|
||||
}
|
||||
|
||||
private:
|
||||
// Run the kernel once, measuring the GPU time. If under skip_time, skip the
|
||||
// measurement.
|
||||
// Run the kernel m_cold_warmup_runs times, measuring the GPU time of the last run.
|
||||
// If under skip_time, skip the measurement.
|
||||
void run_warmup()
|
||||
{
|
||||
if (m_run_once)
|
||||
@@ -248,12 +250,29 @@ private:
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure blocking kernel is loaded during the warmup
|
||||
// Ref: https://github.com/NVIDIA/nvbench/issues/339
|
||||
this->block_stream();
|
||||
this->unblock_stream();
|
||||
|
||||
// disable use of blocking kernel for warm-up run
|
||||
// see https://github.com/NVIDIA/nvbench/issues/240
|
||||
constexpr bool disable_blocking_kernel = true;
|
||||
kernel_launch_timer timer(*this, disable_blocking_kernel);
|
||||
nvbench::cpu_timer warmup_walltime_timer;
|
||||
|
||||
this->launch_kernel(timer);
|
||||
warmup_walltime_timer.start();
|
||||
for (nvbench::int64_t warmup_run = 0; warmup_run < m_cold_warmup_runs; ++warmup_run)
|
||||
{
|
||||
this->launch_kernel(timer);
|
||||
warmup_walltime_timer.stop();
|
||||
|
||||
if (m_cold_max_warmup_walltime > 0. &&
|
||||
warmup_walltime_timer.get_duration() > m_cold_max_warmup_walltime)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
this->check_skip_time(m_cuda_timer.get_duration());
|
||||
}
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ namespace fs = std::filesystem;
|
||||
#include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#else
|
||||
static_assert(false, "No <filesystem> or <experimental/filesystem> found.");
|
||||
#error "No <filesystem> or <experimental/filesystem> found."
|
||||
#endif
|
||||
|
||||
#if NVBENCH_CPP_DIALECT >= 2020
|
||||
@@ -429,9 +429,11 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
bench["name"] = bench_ptr->get_name();
|
||||
bench["index"] = bench_index;
|
||||
|
||||
bench["min_samples"] = bench_ptr->get_min_samples();
|
||||
bench["skip_time"] = bench_ptr->get_skip_time();
|
||||
bench["timeout"] = bench_ptr->get_timeout();
|
||||
bench["min_samples"] = bench_ptr->get_min_samples();
|
||||
bench["cold_warmup_runs"] = bench_ptr->get_cold_warmup_runs();
|
||||
bench["cold_max_warmup_walltime"] = bench_ptr->get_cold_max_warmup_walltime();
|
||||
bench["skip_time"] = bench_ptr->get_skip_time();
|
||||
bench["timeout"] = bench_ptr->get_timeout();
|
||||
|
||||
auto &devices = bench["devices"];
|
||||
for (const auto &dev_info : bench_ptr->get_devices())
|
||||
@@ -486,9 +488,11 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
|
||||
st["name"] = exec_state.get_axis_values_as_string();
|
||||
|
||||
st["min_samples"] = exec_state.get_min_samples();
|
||||
st["skip_time"] = exec_state.get_skip_time();
|
||||
st["timeout"] = exec_state.get_timeout();
|
||||
st["min_samples"] = exec_state.get_min_samples();
|
||||
st["cold_warmup_runs"] = exec_state.get_cold_warmup_runs();
|
||||
st["cold_max_warmup_walltime"] = exec_state.get_cold_max_warmup_walltime();
|
||||
st["skip_time"] = exec_state.get_skip_time();
|
||||
st["timeout"] = exec_state.get_timeout();
|
||||
|
||||
st["device"] = exec_state.get_device()->get_id();
|
||||
st["type_config_index"] = exec_state.get_type_config_index();
|
||||
|
||||
@@ -48,9 +48,20 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <system_error>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#if __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
namespace fs = std::filesystem;
|
||||
#elif __has_include(<experimental/filesystem>)
|
||||
#include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#else
|
||||
#error "No <filesystem> or <experimental/filesystem> found."
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@@ -115,6 +126,18 @@ catch (const std::exception &)
|
||||
|
||||
void parse(std::string_view input, std::string &val) { val = input; }
|
||||
|
||||
void create_output_parent_directories(const std::string &spec)
|
||||
{
|
||||
const fs::path output_path{spec};
|
||||
const fs::path parent_path = output_path.parent_path();
|
||||
if (parent_path.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
fs::create_directories(parent_path);
|
||||
}
|
||||
|
||||
// Parses a list of values "<val1>, <val2>, <val3>, ..." into a vector:
|
||||
template <typename T>
|
||||
std::vector<T> parse_list_values(std::string_view list_spec)
|
||||
@@ -526,14 +549,14 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
this->update_axis(first[1]);
|
||||
first += 2;
|
||||
}
|
||||
else if (arg == "--min-samples")
|
||||
else if (arg == "--min-samples" || arg == "--cold-warmup-runs")
|
||||
{
|
||||
check_params(1);
|
||||
this->update_int64_prop(first[0], first[1]);
|
||||
first += 2;
|
||||
}
|
||||
else if (arg == "--skip-time" || arg == "--timeout" || arg == "--throttle-threshold" ||
|
||||
arg == "--throttle-recovery-delay")
|
||||
else if (arg == "--skip-time" || arg == "--timeout" || arg == "--cold-max-warmup-walltime" ||
|
||||
arg == "--throttle-threshold" || arg == "--throttle-recovery-delay")
|
||||
{
|
||||
check_params(1);
|
||||
this->update_float64_prop(first[0], first[1]);
|
||||
@@ -622,6 +645,8 @@ std::ostream &option_parser::printer_spec_to_ostream(const std::string &spec)
|
||||
}
|
||||
else // spec is a filename:
|
||||
{
|
||||
::create_output_parent_directories(spec);
|
||||
|
||||
auto file_stream = std::make_unique<std::ofstream>();
|
||||
// Throw if file can't open
|
||||
file_stream->exceptions(file_stream->exceptions() | std::ios::failbit);
|
||||
@@ -990,6 +1015,10 @@ try
|
||||
{
|
||||
bench.set_min_samples(value);
|
||||
}
|
||||
else if (prop_arg == "--cold-warmup-runs")
|
||||
{
|
||||
bench.set_cold_warmup_runs(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "Unrecognized property: `{}`", prop_arg);
|
||||
@@ -1103,6 +1132,10 @@ try
|
||||
{
|
||||
bench.set_timeout(value);
|
||||
}
|
||||
else if (prop_arg == "--cold-max-warmup-walltime")
|
||||
{
|
||||
bench.set_cold_max_warmup_walltime(value);
|
||||
}
|
||||
else if (prop_arg == "--throttle-threshold")
|
||||
{
|
||||
bench.set_throttle_threshold(static_cast<nvbench::float32_t>(value) / 100.0f);
|
||||
|
||||
@@ -152,6 +152,26 @@ struct state
|
||||
void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; }
|
||||
/// @}
|
||||
|
||||
/// Execute this many warmup runs before collecting cold measurement samples. @{
|
||||
[[nodiscard]] nvbench::int64_t get_cold_warmup_runs() const { return m_cold_warmup_runs; }
|
||||
void set_cold_warmup_runs(nvbench::int64_t cold_warmup_runs)
|
||||
{
|
||||
m_cold_warmup_runs = cold_warmup_runs > nvbench::int64_t{0} ? cold_warmup_runs
|
||||
: nvbench::int64_t{1};
|
||||
}
|
||||
/// @}
|
||||
|
||||
/// Stop cold warmups after this many seconds of walltime. Negative values disable the limit. @{
|
||||
[[nodiscard]] nvbench::float64_t get_cold_max_warmup_walltime() const
|
||||
{
|
||||
return m_cold_max_warmup_walltime;
|
||||
}
|
||||
void set_cold_max_warmup_walltime(nvbench::float64_t cold_max_warmup_walltime)
|
||||
{
|
||||
m_cold_max_warmup_walltime = cold_max_warmup_walltime;
|
||||
}
|
||||
/// @}
|
||||
|
||||
[[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
|
||||
{
|
||||
return m_criterion_params;
|
||||
@@ -332,7 +352,9 @@ private:
|
||||
std::string m_stopping_criterion;
|
||||
|
||||
nvbench::int64_t m_min_samples;
|
||||
nvbench::int64_t m_cold_warmup_runs;
|
||||
|
||||
nvbench::float64_t m_cold_max_warmup_walltime;
|
||||
nvbench::float64_t m_skip_time;
|
||||
nvbench::float64_t m_timeout;
|
||||
|
||||
|
||||
@@ -39,6 +39,8 @@ state::state(const benchmark_base &bench)
|
||||
, m_criterion_params{bench.get_criterion_params()}
|
||||
, m_stopping_criterion(bench.get_stopping_criterion())
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_cold_warmup_runs{bench.get_cold_warmup_runs()}
|
||||
, m_cold_max_warmup_walltime{bench.get_cold_max_warmup_walltime()}
|
||||
, m_skip_time{bench.get_skip_time()}
|
||||
, m_timeout{bench.get_timeout()}
|
||||
, m_throttle_threshold{bench.get_throttle_threshold()}
|
||||
@@ -61,6 +63,8 @@ state::state(const benchmark_base &bench,
|
||||
, m_criterion_params{bench.get_criterion_params()}
|
||||
, m_stopping_criterion(bench.get_stopping_criterion())
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_cold_warmup_runs{bench.get_cold_warmup_runs()}
|
||||
, m_cold_max_warmup_walltime{bench.get_cold_max_warmup_walltime()}
|
||||
, m_skip_time{bench.get_skip_time()}
|
||||
, m_timeout{bench.get_timeout()}
|
||||
, m_throttle_threshold{bench.get_throttle_threshold()}
|
||||
|
||||
@@ -22,6 +22,21 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <system_error>
|
||||
|
||||
#if __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
namespace fs = std::filesystem;
|
||||
#elif __has_include(<experimental/filesystem>)
|
||||
#include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#else
|
||||
#error "No <filesystem> or <experimental/filesystem> found."
|
||||
#endif
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
//==============================================================================
|
||||
@@ -49,6 +64,40 @@ NVBENCH_BENCH_TYPES(TestBench, NVBENCH_TYPE_AXES(Ts, Us))
|
||||
namespace
|
||||
{
|
||||
|
||||
struct temp_tree
|
||||
{
|
||||
explicit temp_tree(fs::path root)
|
||||
: root_path{std::move(root)}
|
||||
{
|
||||
std::error_code ec;
|
||||
fs::remove_all(root_path, ec);
|
||||
if (ec)
|
||||
{
|
||||
throw std::runtime_error{fmt::format("Failed to remove temporary directory `{}`: {}",
|
||||
root_path.string(),
|
||||
ec.message())};
|
||||
}
|
||||
}
|
||||
|
||||
~temp_tree()
|
||||
{
|
||||
std::error_code ec;
|
||||
fs::remove_all(root_path, ec);
|
||||
if (ec)
|
||||
{
|
||||
std::cerr << "Failed to remove temporary directory `" << root_path.string()
|
||||
<< "`: " << ec.message() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
temp_tree(const temp_tree &) = delete;
|
||||
temp_tree(temp_tree &&) = delete;
|
||||
temp_tree &operator=(const temp_tree &) = delete;
|
||||
temp_tree &operator=(temp_tree &&) = delete;
|
||||
|
||||
fs::path root_path;
|
||||
};
|
||||
|
||||
[[nodiscard]] std::string states_to_string(const std::vector<nvbench::state> &states)
|
||||
{
|
||||
fmt::memory_buffer buffer;
|
||||
@@ -1155,6 +1204,36 @@ void test_min_samples()
|
||||
ASSERT(states[0].get_min_samples() == 12345);
|
||||
}
|
||||
|
||||
void test_cold_warmup_runs()
|
||||
{
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--benchmark", "DummyBench", "--cold-warmup-runs", "12345"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(states[0].get_cold_warmup_runs() == 12345);
|
||||
}
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--benchmark", "DummyBench", "--cold-warmup-runs", "0"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(states[0].get_cold_warmup_runs() == 1);
|
||||
}
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--benchmark", "DummyBench", "--cold-warmup-runs", "-12345"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(states[0].get_cold_warmup_runs() == 1);
|
||||
}
|
||||
}
|
||||
|
||||
void test_skip_time()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
@@ -1165,6 +1244,16 @@ void test_skip_time()
|
||||
ASSERT(std::abs(states[0].get_skip_time() - 12345e2) < 1.);
|
||||
}
|
||||
|
||||
void test_cold_max_warmup_walltime()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--benchmark", "DummyBench", "--cold-max-warmup-walltime", "12345e2"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(std::abs(states[0].get_cold_max_warmup_walltime() - 12345e2) < 1.);
|
||||
}
|
||||
|
||||
void test_timeout()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
@@ -1175,6 +1264,22 @@ void test_timeout()
|
||||
ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
|
||||
}
|
||||
|
||||
void test_output_parent_directories_created()
|
||||
{
|
||||
const auto unique_suffix = std::chrono::steady_clock::now().time_since_epoch().count();
|
||||
const temp_tree temp{fs::temp_directory_path() /
|
||||
fmt::format("nvbench_option_parser_test_{}", unique_suffix)};
|
||||
const auto output_path = temp.root_path / "nested" / "results.json";
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--json", output_path.string()});
|
||||
}
|
||||
|
||||
ASSERT(fs::is_directory(output_path.parent_path()));
|
||||
ASSERT(fs::exists(output_path));
|
||||
}
|
||||
|
||||
void test_stopping_criterion()
|
||||
{
|
||||
{ // Per benchmark criterion
|
||||
@@ -1466,8 +1571,11 @@ try
|
||||
test_axis_before_benchmark();
|
||||
|
||||
test_min_samples();
|
||||
test_cold_warmup_runs();
|
||||
test_skip_time();
|
||||
test_cold_max_warmup_walltime();
|
||||
test_timeout();
|
||||
test_output_parent_directories_created();
|
||||
|
||||
test_stopping_criterion();
|
||||
|
||||
|
||||
@@ -762,9 +762,11 @@ void test_devices()
|
||||
|
||||
void test_termination_criteria()
|
||||
{
|
||||
const nvbench::int64_t min_samples = 1000;
|
||||
const nvbench::float64_t skip_time = 4000;
|
||||
const nvbench::float64_t timeout = 5000;
|
||||
const nvbench::int64_t min_samples = 1000;
|
||||
const nvbench::int64_t cold_warmup_runs = 7;
|
||||
const nvbench::float64_t cold_max_warmup_walltime = 3000;
|
||||
const nvbench::float64_t skip_time = 4000;
|
||||
const nvbench::float64_t timeout = 5000;
|
||||
|
||||
// for comparing floats
|
||||
auto within_one = [](auto a, auto b) { return std::abs(a - b) < 1.; };
|
||||
@@ -772,6 +774,8 @@ void test_termination_criteria()
|
||||
dummy_bench bench;
|
||||
bench.set_devices(std::vector<int>{});
|
||||
bench.set_min_samples(min_samples);
|
||||
bench.set_cold_warmup_runs(cold_warmup_runs);
|
||||
bench.set_cold_max_warmup_walltime(cold_max_warmup_walltime);
|
||||
bench.set_skip_time(skip_time);
|
||||
bench.set_timeout(timeout);
|
||||
|
||||
@@ -779,6 +783,8 @@ void test_termination_criteria()
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(min_samples == states[0].get_min_samples());
|
||||
ASSERT(cold_warmup_runs == states[0].get_cold_warmup_runs());
|
||||
ASSERT(within_one(cold_max_warmup_walltime, states[0].get_cold_max_warmup_walltime()));
|
||||
ASSERT(within_one(skip_time, states[0].get_skip_time()));
|
||||
ASSERT(within_one(timeout, states[0].get_timeout()));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user