mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 06:48:53 +00:00
341 lines
11 KiB
Plaintext
341 lines
11 KiB
Plaintext
/*
|
|
* Copyright 2021 NVIDIA Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License.
|
|
*
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <nvbench/axes_metadata.cuh>
|
|
#include <nvbench/device_info.cuh>
|
|
#include <nvbench/state.cuh>
|
|
#include <nvbench/stopping_criterion.cuh>
|
|
|
|
#include <functional> // reference_wrapper, ref
|
|
#include <memory>
|
|
#include <optional>
|
|
#include <vector>
|
|
|
|
namespace nvbench
|
|
{
|
|
|
|
struct printer_base;
|
|
struct runner_base;
|
|
|
|
template <typename BenchmarkType>
|
|
struct runner;
|
|
|
|
/**
|
|
* Hold runtime benchmark information and provides public customization API for
|
|
* the `NVBENCH_BENCH` macros.
|
|
*
|
|
* Delegates responsibility to the following classes:
|
|
* - nvbench::axes_metadata: Axis specifications.
|
|
*/
|
|
struct benchmark_base
|
|
{
|
|
template <typename T>
|
|
using optional_ref = std::optional<std::reference_wrapper<T>>;
|
|
|
|
template <typename TypeAxes>
|
|
explicit benchmark_base(TypeAxes type_axes)
|
|
: m_axes(type_axes)
|
|
{
|
|
this->set_stopping_criterion(nvbench::detail::default_stopping_criterion());
|
|
}
|
|
|
|
virtual ~benchmark_base();
|
|
|
|
/**
|
|
* Returns a pointer to a new instance of the concrete benchmark<...>
|
|
* subclass.
|
|
*
|
|
* The result will have the same name and axes as the source benchmark.
|
|
* The `get_states()` vector of the result will always be empty.
|
|
*/
|
|
[[nodiscard]] std::unique_ptr<benchmark_base> clone() const;
|
|
|
|
benchmark_base &set_name(std::string name)
|
|
{
|
|
m_name = std::move(name);
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] const std::string &get_name() const { return m_name; }
|
|
|
|
benchmark_base &set_type_axes_names(std::vector<std::string> names)
|
|
{
|
|
this->do_set_type_axes_names(std::move(names));
|
|
return *this;
|
|
}
|
|
|
|
benchmark_base &add_float64_axis(std::string name, std::vector<nvbench::float64_t> data)
|
|
{
|
|
m_axes.add_float64_axis(std::move(name), std::move(data));
|
|
return *this;
|
|
}
|
|
|
|
benchmark_base &add_int64_axis(std::string name,
|
|
std::vector<nvbench::int64_t> data,
|
|
nvbench::int64_axis_flags flags = nvbench::int64_axis_flags::none)
|
|
{
|
|
m_axes.add_int64_axis(std::move(name), std::move(data), flags);
|
|
return *this;
|
|
}
|
|
|
|
benchmark_base &add_int64_power_of_two_axis(std::string name, std::vector<nvbench::int64_t> data)
|
|
{
|
|
return this->add_int64_axis(std::move(name),
|
|
std::move(data),
|
|
nvbench::int64_axis_flags::power_of_two);
|
|
}
|
|
|
|
benchmark_base &add_string_axis(std::string name, std::vector<std::string> data)
|
|
{
|
|
m_axes.add_string_axis(std::move(name), std::move(data));
|
|
return *this;
|
|
}
|
|
|
|
benchmark_base &set_devices(std::vector<int> device_ids);
|
|
|
|
benchmark_base &set_devices(std::vector<nvbench::device_info> devices)
|
|
{
|
|
m_devices = std::move(devices);
|
|
return *this;
|
|
}
|
|
|
|
benchmark_base &clear_devices()
|
|
{
|
|
m_devices.clear();
|
|
return *this;
|
|
}
|
|
|
|
benchmark_base &add_device(int device_id);
|
|
|
|
benchmark_base &add_device(nvbench::device_info device)
|
|
{
|
|
m_devices.push_back(std::move(device));
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] const std::vector<nvbench::device_info> &get_devices() const { return m_devices; }
|
|
|
|
[[nodiscard]] nvbench::axes_metadata &get_axes() { return m_axes; }
|
|
|
|
[[nodiscard]] const nvbench::axes_metadata &get_axes() const { return m_axes; }
|
|
|
|
// Computes the number of configs in the benchmark.
|
|
// Unlike get_states().size(), this method may be used prior to calling run().
|
|
[[nodiscard]] std::size_t get_config_count() const;
|
|
|
|
// Is empty until run() is called.
|
|
[[nodiscard]] const std::vector<nvbench::state> &get_states() const { return m_states; }
|
|
[[nodiscard]] std::vector<nvbench::state> &get_states() { return m_states; }
|
|
|
|
void run() { this->do_run(); }
|
|
void run_or_skip(bool &skip_remaining) { this->do_run_or_skip(skip_remaining); }
|
|
|
|
void set_printer(nvbench::printer_base &printer) { m_printer = std::ref(printer); }
|
|
|
|
void clear_printer() { m_printer = std::nullopt; }
|
|
|
|
[[nodiscard]] optional_ref<nvbench::printer_base> get_printer() const { return m_printer; }
|
|
|
|
/// Execute at least this many trials per measurement. @{
|
|
[[nodiscard]] nvbench::int64_t get_min_samples() const { return m_min_samples; }
|
|
benchmark_base &set_min_samples(nvbench::int64_t min_samples)
|
|
{
|
|
m_min_samples = min_samples;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
/// If true, the benchmark measurements only record CPU time and assume no GPU work is performed.
|
|
/// @{
|
|
[[nodiscard]] bool get_is_cpu_only() const { return m_is_cpu_only; }
|
|
benchmark_base &set_is_cpu_only(bool is_cpu_only)
|
|
{
|
|
m_is_cpu_only = is_cpu_only;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
/// If true, the benchmark is only run once, skipping all warmup runs and only
|
|
/// executing a single non-batched measurement. This is intended for use with
|
|
/// external profiling tools. @{
|
|
[[nodiscard]] bool get_run_once() const { return m_run_once; }
|
|
benchmark_base &set_run_once(bool v)
|
|
{
|
|
m_run_once = v;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
/// If true, the batched measurements for benchmark are not run. This is intended for use to
|
|
/// save resources when only non-batched measurements are of interest, although batched
|
|
/// measurements are meaningful and code to exercise them is compiled. This option has no
|
|
/// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
|
|
[[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
|
|
benchmark_base &set_skip_batched(bool v)
|
|
{
|
|
m_skip_batched = v;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
/// If true, the benchmark does not use the blocking_kernel. This is intended
|
|
/// for use with external profiling tools. @{
|
|
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
|
|
benchmark_base &set_disable_blocking_kernel(bool v)
|
|
{
|
|
m_disable_blocking_kernel = v;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
/// If a warmup run finishes in less than `skip_time`, the measurement will
|
|
/// be skipped.
|
|
/// Extremely fast kernels (< 5000 ns) often timeout before they can
|
|
/// accumulate `min_time` measurements, and are often uninteresting. Setting
|
|
/// this value can help improve performance by skipping time consuming
|
|
/// measurement that don't provide much information.
|
|
/// Default value is -1., which disables the feature.
|
|
/// @{
|
|
[[nodiscard]] nvbench::float64_t get_skip_time() const { return m_skip_time; }
|
|
benchmark_base &set_skip_time(nvbench::float64_t skip_time)
|
|
{
|
|
m_skip_time = skip_time;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
/// If a measurement take more than `timeout` seconds to complete, stop the
|
|
/// measurement early. A warning should be printed if this happens.
|
|
/// This setting overrides all other termination criteria.
|
|
/// Note that this is measured in CPU walltime, not sample time.
|
|
/// @{
|
|
[[nodiscard]] nvbench::float64_t get_timeout() const { return m_timeout; }
|
|
benchmark_base &set_timeout(nvbench::float64_t timeout)
|
|
{
|
|
m_timeout = timeout;
|
|
return *this;
|
|
}
|
|
/// @}
|
|
|
|
[[nodiscard]] nvbench::float32_t get_throttle_threshold() const { return m_throttle_threshold; }
|
|
|
|
benchmark_base &set_throttle_threshold(nvbench::float32_t throttle_threshold)
|
|
{
|
|
m_throttle_threshold = throttle_threshold;
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] nvbench::float32_t get_throttle_recovery_delay() const
|
|
{
|
|
return m_throttle_recovery_delay;
|
|
}
|
|
|
|
benchmark_base &set_throttle_recovery_delay(nvbench::float32_t throttle_recovery_delay)
|
|
{
|
|
m_throttle_recovery_delay = throttle_recovery_delay;
|
|
return *this;
|
|
}
|
|
|
|
/// Control the stopping criterion for the measurement loop.
|
|
/// @{
|
|
[[nodiscard]] const std::string &get_stopping_criterion() const { return m_stopping_criterion; }
|
|
benchmark_base &set_stopping_criterion(std::string criterion);
|
|
/// @}
|
|
|
|
[[nodiscard]] bool has_criterion_param(const std::string &name) const
|
|
{
|
|
return m_criterion_params.has_value(name);
|
|
}
|
|
|
|
[[nodiscard]] nvbench::int64_t get_criterion_param_int64(const std::string &name) const
|
|
{
|
|
return m_criterion_params.get_int64(name);
|
|
}
|
|
benchmark_base &set_criterion_param_int64(const std::string &name, nvbench::int64_t value)
|
|
{
|
|
m_criterion_params.set_int64(name, value);
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] nvbench::float64_t get_criterion_param_float64(const std::string &name) const
|
|
{
|
|
return m_criterion_params.get_float64(name);
|
|
}
|
|
benchmark_base &set_criterion_param_float64(const std::string &name, nvbench::float64_t value)
|
|
{
|
|
m_criterion_params.set_float64(name, value);
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] std::string get_criterion_param_string(const std::string &name) const
|
|
{
|
|
return m_criterion_params.get_string(name);
|
|
}
|
|
benchmark_base &set_criterion_param_string(const std::string &name, std::string value)
|
|
{
|
|
m_criterion_params.set_string(name, std::move(value));
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] nvbench::criterion_params &get_criterion_params() { return m_criterion_params; }
|
|
[[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
|
|
{
|
|
return m_criterion_params;
|
|
}
|
|
|
|
protected:
|
|
friend struct nvbench::runner_base;
|
|
|
|
template <typename BenchmarkType>
|
|
friend struct nvbench::runner;
|
|
|
|
std::string m_name;
|
|
nvbench::axes_metadata m_axes;
|
|
std::vector<nvbench::device_info> m_devices;
|
|
std::vector<nvbench::state> m_states;
|
|
|
|
optional_ref<nvbench::printer_base> m_printer;
|
|
|
|
bool m_is_cpu_only{false};
|
|
bool m_run_once{false};
|
|
bool m_disable_blocking_kernel{false};
|
|
bool m_skip_batched{false};
|
|
|
|
nvbench::int64_t m_min_samples{10};
|
|
|
|
nvbench::float64_t m_skip_time{-1.};
|
|
nvbench::float64_t m_timeout{15.};
|
|
|
|
nvbench::float32_t m_throttle_threshold{0.75f}; // [% of default SM clock rate]
|
|
nvbench::float32_t m_throttle_recovery_delay{0.05f}; // [seconds]
|
|
|
|
nvbench::criterion_params m_criterion_params;
|
|
std::string m_stopping_criterion{};
|
|
|
|
private:
|
|
// route these through virtuals so the templated subclass can inject type info
|
|
virtual std::unique_ptr<benchmark_base> do_clone() const = 0;
|
|
virtual void do_set_type_axes_names(std::vector<std::string> names) = 0;
|
|
virtual void do_run() = 0;
|
|
virtual void do_run_or_skip(bool &skip_remaining) = 0;
|
|
};
|
|
|
|
} // namespace nvbench
|