mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 14:58:54 +00:00
Add a cuda stream member to nvbench::state
This commit is contained in:
@@ -29,14 +29,14 @@ namespace nvbench
|
||||
struct cuda_stream
|
||||
{
|
||||
cuda_stream()
|
||||
: m_owning(true)
|
||||
: m_owning{true}
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaStreamCreate(&m_stream));
|
||||
}
|
||||
|
||||
cuda_stream(cuda_stream stream, bool owning)
|
||||
: m_stream(stream)
|
||||
, m_owning(owning)
|
||||
: m_stream{stream}
|
||||
, m_owning{owning}
|
||||
{}
|
||||
|
||||
// destroy the stream if it's owning
|
||||
@@ -55,8 +55,8 @@ struct cuda_stream
|
||||
cuda_stream &operator=(const cuda_stream &) = delete;
|
||||
|
||||
cuda_stream(cuda_stream &&other)
|
||||
: m_stream(other.get_stream())
|
||||
, m_owning(other.is_owning())
|
||||
: m_stream{other.get_stream()}
|
||||
, m_owning{other.is_owning()}
|
||||
{
|
||||
other.destroy();
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
@@ -62,6 +63,15 @@ struct state
|
||||
state &operator=(const state &) = delete;
|
||||
state &operator=(state &&) = default;
|
||||
|
||||
[[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const
|
||||
{
|
||||
return m_cuda_stream;
|
||||
}
|
||||
void set_cuda_stream(nvbench::cuda_stream &&stream)
|
||||
{
|
||||
m_cuda_stream = std::move(stream);
|
||||
}
|
||||
|
||||
/// The CUDA device associated with with this benchmark state. May be
|
||||
/// nullopt for CPU-only benchmarks.
|
||||
[[nodiscard]] const std::optional<nvbench::device_info> &get_device() const
|
||||
@@ -259,11 +269,9 @@ struct state
|
||||
|
||||
[[nodiscard]] bool is_cupti_required() const
|
||||
{
|
||||
return is_l2_hit_rate_collected()
|
||||
|| is_l1_hit_rate_collected()
|
||||
|| is_stores_efficiency_collected()
|
||||
|| is_loads_efficiency_collected()
|
||||
|| is_dram_throughput_collected();
|
||||
return is_l2_hit_rate_collected() || is_l1_hit_rate_collected() ||
|
||||
is_stores_efficiency_collected() ||
|
||||
is_loads_efficiency_collected() || is_dram_throughput_collected();
|
||||
}
|
||||
|
||||
summary &add_summary(std::string summary_tag);
|
||||
@@ -303,6 +311,7 @@ private:
|
||||
std::optional<nvbench::device_info> device,
|
||||
std::size_t type_config_index);
|
||||
|
||||
nvbench::cuda_stream m_cuda_stream;
|
||||
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
|
||||
nvbench::named_values m_axis_values;
|
||||
std::optional<nvbench::device_info> m_device;
|
||||
|
||||
@@ -33,7 +33,8 @@ namespace nvbench
|
||||
{
|
||||
|
||||
state::state(const benchmark_base &bench)
|
||||
: m_benchmark{bench}
|
||||
: m_cuda_stream{}
|
||||
, m_benchmark{bench}
|
||||
, m_run_once{bench.get_run_once()}
|
||||
, m_min_samples{bench.get_min_samples()}
|
||||
, m_min_time{bench.get_min_time()}
|
||||
@@ -46,7 +47,8 @@ state::state(const benchmark_base &bench,
|
||||
nvbench::named_values values,
|
||||
std::optional<nvbench::device_info> device,
|
||||
std::size_t type_config_index)
|
||||
: m_benchmark{bench}
|
||||
: m_cuda_stream{}
|
||||
, m_benchmark{bench}
|
||||
, m_axis_values{std::move(values)}
|
||||
, m_device{std::move(device)}
|
||||
, m_type_config_index{type_config_index}
|
||||
|
||||
Reference in New Issue
Block a user