mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Merge pull request #113 from elstehle/fix/per-device-stream
Fixes cudaErrorInvalidValue when running on nvbench-created cuda stream
This commit is contained in:
@@ -19,10 +19,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/detail/device_scope.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
@@ -39,18 +42,36 @@ namespace nvbench
|
||||
struct cuda_stream
|
||||
{
|
||||
/**
|
||||
* Constructs a cuda_stream that owns a new stream, created with
|
||||
* `cudaStreamCreate`.
|
||||
* Constructs a cuda_stream that owns a new stream, created with `cudaStreamCreate`.
|
||||
*
|
||||
* @param device The device that this stream should be associated with. If no device is provided,
|
||||
* the stream will be associated with the device that is active at the call time.
|
||||
*/
|
||||
cuda_stream()
|
||||
: m_stream{[]() {
|
||||
explicit cuda_stream(std::optional<nvbench::device_info> device)
|
||||
: m_stream{[device]() {
|
||||
cudaStream_t s;
|
||||
NVBENCH_CUDA_CALL(cudaStreamCreate(&s));
|
||||
if (device.has_value())
|
||||
{
|
||||
nvbench::detail::device_scope scope_guard{device.value().get_id()};
|
||||
NVBENCH_CUDA_CALL(cudaStreamCreate(&s));
|
||||
}
|
||||
else
|
||||
{
|
||||
NVBENCH_CUDA_CALL(cudaStreamCreate(&s));
|
||||
}
|
||||
return s;
|
||||
}(),
|
||||
stream_deleter{true}}
|
||||
{}
|
||||
|
||||
/**
|
||||
* @brief Constructs a new cuda_stream tha is associated with the device that is active at the
|
||||
* call time.
|
||||
*/
|
||||
cuda_stream()
|
||||
: cuda_stream(std::nullopt)
|
||||
{}
|
||||
|
||||
/**
|
||||
* Constructs a `cuda_stream` from an explicit cudaStream_t.
|
||||
*
|
||||
|
||||
@@ -37,7 +37,7 @@ namespace nvbench::detail
|
||||
|
||||
measure_cold_base::measure_cold_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_launch{m_state.get_cuda_stream()}
|
||||
, m_launch{exec_state.get_cuda_stream()}
|
||||
, m_criterion_params{exec_state.get_criterion_params()}
|
||||
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(
|
||||
exec_state.get_stopping_criterion())}
|
||||
|
||||
@@ -165,7 +165,7 @@ measure_cupti_base::measure_cupti_base(state &exec_state)
|
||||
// (formatter doesn't handle `try :` very well...)
|
||||
try
|
||||
: m_state{exec_state}
|
||||
, m_launch{m_state.get_cuda_stream()}
|
||||
, m_launch{exec_state.get_cuda_stream()}
|
||||
, m_cupti{*m_state.get_device(), add_metrics(m_state)}
|
||||
{}
|
||||
// clang-format on
|
||||
|
||||
@@ -36,7 +36,7 @@ namespace nvbench::detail
|
||||
|
||||
measure_hot_base::measure_hot_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_launch{m_state.get_cuda_stream()}
|
||||
, m_launch{exec_state.get_cuda_stream()}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_min_time{exec_state.get_min_time()}
|
||||
, m_skip_time{exec_state.get_skip_time()}
|
||||
|
||||
@@ -64,7 +64,21 @@ struct state
|
||||
state &operator=(const state &) = delete;
|
||||
state &operator=(state &&) = default;
|
||||
|
||||
[[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const { return m_cuda_stream; }
|
||||
/// If a stream exists, return that. Otherwise, create a new stream using the current
|
||||
/// device (or the current device if none is set), save it, and return it.
|
||||
/// @sa get_cuda_stream_optional
|
||||
[[nodiscard]] nvbench::cuda_stream &get_cuda_stream()
|
||||
{
|
||||
if (!m_cuda_stream.has_value())
|
||||
{
|
||||
m_cuda_stream = nvbench::cuda_stream{m_device};
|
||||
}
|
||||
return m_cuda_stream.value();
|
||||
}
|
||||
[[nodiscard]] const std::optional<nvbench::cuda_stream> &get_cuda_stream_optional() const
|
||||
{
|
||||
return m_cuda_stream;
|
||||
}
|
||||
void set_cuda_stream(nvbench::cuda_stream &&stream) { m_cuda_stream = std::move(stream); }
|
||||
|
||||
/// The CUDA device associated with with this benchmark state. May be
|
||||
@@ -313,7 +327,6 @@ private:
|
||||
std::optional<nvbench::device_info> device,
|
||||
std::size_t type_config_index);
|
||||
|
||||
nvbench::cuda_stream m_cuda_stream;
|
||||
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
|
||||
nvbench::named_values m_axis_values;
|
||||
std::optional<nvbench::device_info> m_device;
|
||||
@@ -334,6 +347,8 @@ private:
|
||||
nvbench::float32_t m_throttle_threshold; // [% of default SM clock rate]
|
||||
nvbench::float32_t m_throttle_recovery_delay; // [seconds]
|
||||
|
||||
std::optional<nvbench::cuda_stream> m_cuda_stream;
|
||||
|
||||
// Deadlock protection. See blocking_kernel's class doc for details.
|
||||
nvbench::float64_t m_blocking_kernel_timeout{30.0};
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ state::state(const benchmark_base &bench)
|
||||
, m_timeout{bench.get_timeout()}
|
||||
, m_throttle_threshold{bench.get_throttle_threshold()}
|
||||
, m_throttle_recovery_delay{bench.get_throttle_recovery_delay()}
|
||||
, m_cuda_stream{std::nullopt}
|
||||
{}
|
||||
|
||||
state::state(const benchmark_base &bench,
|
||||
@@ -62,6 +63,7 @@ state::state(const benchmark_base &bench,
|
||||
, m_timeout{bench.get_timeout()}
|
||||
, m_throttle_threshold{bench.get_throttle_threshold()}
|
||||
, m_throttle_recovery_delay{bench.get_throttle_recovery_delay()}
|
||||
, m_cuda_stream{std::nullopt}
|
||||
{}
|
||||
|
||||
nvbench::int64_t state::get_int64(const std::string &axis_name) const
|
||||
|
||||
@@ -3,6 +3,7 @@ set(test_srcs
|
||||
benchmark.cu
|
||||
create.cu
|
||||
cuda_timer.cu
|
||||
cuda_stream.cu
|
||||
cpu_timer.cu
|
||||
criterion_manager.cu
|
||||
criterion_params.cu
|
||||
|
||||
77
testing/cuda_stream.cu
Normal file
77
testing/cuda_stream.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
namespace
|
||||
{
|
||||
#ifdef NVBENCH_HAS_CUPTI
|
||||
/**
|
||||
* @brief Queries and returns the device id that the given \p cuda_stream is associated with
|
||||
*
|
||||
* @param cuda_stream The stream to get the device id for
|
||||
* @return The device id that \p cuda_stream is associated with
|
||||
*/
|
||||
int get_device_of_stream(cudaStream_t cuda_stream)
|
||||
{
|
||||
CUcontext ctx;
|
||||
NVBENCH_DRIVER_API_CALL(cuStreamGetCtx(CUstream{cuda_stream}, &ctx));
|
||||
NVBENCH_DRIVER_API_CALL(cuCtxPushCurrent(ctx));
|
||||
CUdevice device_id{};
|
||||
NVBENCH_DRIVER_API_CALL(cuCtxGetDevice(&device_id));
|
||||
NVBENCH_DRIVER_API_CALL(cuCtxPopCurrent(&ctx));
|
||||
return static_cast<int>(device_id);
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
void test_basic()
|
||||
{
|
||||
#ifdef NVBENCH_HAS_CUPTI
|
||||
// Get devices
|
||||
auto devices = nvbench::device_manager::get().get_devices();
|
||||
|
||||
// Iterate over devices
|
||||
for (auto const &device_info : devices)
|
||||
{
|
||||
// Create stream on the device before it becomes the active device
|
||||
nvbench::cuda_stream device_stream(device_info);
|
||||
|
||||
// Verify cuda stream is associated with the correct cuda device
|
||||
ASSERT(get_device_of_stream(device_stream.get_stream()) == device_info.get_id());
|
||||
|
||||
// Set the device as active device
|
||||
device_info.set_active();
|
||||
|
||||
// Create the stream (implicitly) on the device that is currently active
|
||||
nvbench::cuda_stream current_device_stream{};
|
||||
|
||||
// Verify the cuda stream was in fact associated with the currently active device
|
||||
ASSERT(get_device_of_stream(current_device_stream.get_stream()) == device_info.get_id());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int main() { test_basic(); }
|
||||
@@ -55,9 +55,13 @@ void test_streams()
|
||||
|
||||
state_tester state{bench};
|
||||
|
||||
// Confirm that the stream hasn't been initialized yet
|
||||
ASSERT(!state.get_cuda_stream_optional().has_value());
|
||||
|
||||
// Test non-owning stream
|
||||
cudaStream_t default_stream = 0;
|
||||
state.set_cuda_stream(nvbench::cuda_stream{default_stream, false});
|
||||
ASSERT(state.get_cuda_stream_optional() == default_stream);
|
||||
ASSERT(state.get_cuda_stream() == default_stream);
|
||||
|
||||
// Test owning stream
|
||||
|
||||
Reference in New Issue
Block a user