mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-05-13 17:55:39 +00:00
Refactor measurement cleanup guards for testability
Extract hot stream cleanup and cold launch timer cleanup into reusable detail helpers. Keep measure_hot and measure_cold using those helpers through thin adapters so the tested cleanup logic matches the production path. Add driver-free cleanup guard tests using a fake measure object to verify cleanup ordering when exceptions occur after blocking stream setup, after hot unblock, and around cold GPU frequency start/stop paths.
This commit is contained in:
@@ -35,6 +35,7 @@
|
||||
#include <nvbench/detail/gpu_frequency.cuh>
|
||||
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
|
||||
#include <nvbench/detail/l2flush.cuh>
|
||||
#include <nvbench/detail/measure_cold_launch_timer_core.cuh>
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
@@ -146,48 +147,18 @@ protected:
|
||||
|
||||
struct measure_cold_base::kernel_launch_timer
|
||||
{
|
||||
private:
|
||||
__forceinline__ void cleanup_noexcept() noexcept;
|
||||
|
||||
struct cleanup_guard
|
||||
{
|
||||
explicit cleanup_guard(kernel_launch_timer &timer)
|
||||
: m_timer{timer}
|
||||
{}
|
||||
|
||||
cleanup_guard(const cleanup_guard &) = delete;
|
||||
cleanup_guard(cleanup_guard &&) = delete;
|
||||
cleanup_guard &operator=(const cleanup_guard &) = delete;
|
||||
cleanup_guard &operator=(cleanup_guard &&) = delete;
|
||||
|
||||
~cleanup_guard() noexcept
|
||||
{
|
||||
if (m_active)
|
||||
{
|
||||
m_timer.cleanup_noexcept();
|
||||
}
|
||||
}
|
||||
|
||||
void release() noexcept { m_active = false; }
|
||||
|
||||
private:
|
||||
kernel_launch_timer &m_timer;
|
||||
bool m_active{true};
|
||||
};
|
||||
|
||||
public:
|
||||
kernel_launch_timer(measure_cold_base &measure)
|
||||
: m_measure{measure}
|
||||
, m_disable_blocking_kernel{measure.m_disable_blocking_kernel}
|
||||
, m_run_once{measure.m_run_once}
|
||||
, m_check_throttling{measure.m_check_throttling}
|
||||
: kernel_launch_timer{measure,
|
||||
measure.m_disable_blocking_kernel,
|
||||
measure.m_run_once,
|
||||
measure.m_check_throttling}
|
||||
{}
|
||||
|
||||
explicit kernel_launch_timer(measure_cold_base &measure, bool disable_blocking_kernel)
|
||||
: m_measure{measure}
|
||||
, m_disable_blocking_kernel{disable_blocking_kernel}
|
||||
, m_run_once{measure.m_run_once}
|
||||
, m_check_throttling{measure.m_check_throttling}
|
||||
: kernel_launch_timer{measure,
|
||||
disable_blocking_kernel,
|
||||
measure.m_run_once,
|
||||
measure.m_check_throttling}
|
||||
{}
|
||||
|
||||
explicit kernel_launch_timer(measure_cold_base &measure,
|
||||
@@ -195,125 +166,69 @@ public:
|
||||
bool run_once,
|
||||
bool check_throttling)
|
||||
: m_measure{measure}
|
||||
, m_disable_blocking_kernel{disable_blocking_kernel}
|
||||
, m_run_once{run_once}
|
||||
, m_check_throttling{check_throttling}
|
||||
, m_core{*this, {disable_blocking_kernel, run_once, check_throttling}}
|
||||
{}
|
||||
|
||||
~kernel_launch_timer() noexcept { this->cleanup_noexcept(); }
|
||||
kernel_launch_timer(const kernel_launch_timer &) = delete;
|
||||
kernel_launch_timer(kernel_launch_timer &&) = delete;
|
||||
kernel_launch_timer &operator=(const kernel_launch_timer &) = delete;
|
||||
kernel_launch_timer &operator=(kernel_launch_timer &&) = delete;
|
||||
|
||||
__forceinline__ void start()
|
||||
~kernel_launch_timer() noexcept = default;
|
||||
|
||||
__forceinline__ void start() { m_core.start(); }
|
||||
|
||||
__forceinline__ void stop() { m_core.stop(); }
|
||||
|
||||
__forceinline__ void flush_device_l2() { m_measure.flush_device_l2(); }
|
||||
|
||||
__forceinline__ void sync_stream() { m_measure.sync_stream(); }
|
||||
|
||||
__forceinline__ cudaError_t sync_stream_noexcept() const noexcept
|
||||
{
|
||||
cleanup_guard cleanup{*this};
|
||||
|
||||
m_measure.flush_device_l2();
|
||||
m_measure.sync_stream();
|
||||
|
||||
// start CPU timer irrespective of use of blocking kernel
|
||||
// Ref: https://github.com/NVIDIA/nvbench/issues/249
|
||||
m_measure.m_cpu_timer.start();
|
||||
m_cpu_timer_started = true;
|
||||
|
||||
if (!m_disable_blocking_kernel)
|
||||
{
|
||||
// Arm cleanup before queueing the blocking kernel. If block_stream throws
|
||||
// after queueing work, cleanup_noexcept must still unblock the stream.
|
||||
m_stream_unblock_armed = true;
|
||||
m_measure.block_stream();
|
||||
}
|
||||
if (m_check_throttling)
|
||||
{
|
||||
// Arm cleanup before queueing timestamp work. If gpu_frequency_start
|
||||
// throws after queueing work, cleanup_noexcept must still sync the stream.
|
||||
m_gpu_frequency_cleanup_armed = true;
|
||||
m_measure.gpu_frequency_start();
|
||||
}
|
||||
if (m_run_once)
|
||||
{
|
||||
m_measure.profiler_start();
|
||||
m_profiler_started = true;
|
||||
}
|
||||
m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
|
||||
m_cuda_timer_started = true;
|
||||
|
||||
cleanup.release();
|
||||
return m_measure.sync_stream_noexcept();
|
||||
}
|
||||
|
||||
__forceinline__ void stop()
|
||||
__forceinline__ void cpu_timer_start() noexcept { m_measure.m_cpu_timer.start(); }
|
||||
|
||||
__forceinline__ void cpu_timer_stop() noexcept { m_measure.m_cpu_timer.stop(); }
|
||||
|
||||
__forceinline__ void cpu_timer_stop_noexcept() noexcept { m_measure.m_cpu_timer.stop(); }
|
||||
|
||||
__forceinline__ void block_stream() { m_measure.block_stream(); }
|
||||
|
||||
__forceinline__ void unblock_stream() { m_measure.unblock_stream(); }
|
||||
|
||||
__forceinline__ void unblock_stream_noexcept() noexcept { m_measure.unblock_stream_noexcept(); }
|
||||
|
||||
__forceinline__ void gpu_frequency_start() { m_measure.gpu_frequency_start(); }
|
||||
|
||||
__forceinline__ void gpu_frequency_stop() { m_measure.gpu_frequency_stop(); }
|
||||
|
||||
__forceinline__ void profiler_start() { m_measure.profiler_start(); }
|
||||
|
||||
__forceinline__ void profiler_stop() { m_measure.profiler_stop(); }
|
||||
|
||||
__forceinline__ cudaError_t profiler_stop_noexcept() const noexcept
|
||||
{
|
||||
cleanup_guard cleanup{*this};
|
||||
return m_measure.profiler_stop_noexcept();
|
||||
}
|
||||
|
||||
if (m_cuda_timer_started)
|
||||
{
|
||||
m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
|
||||
m_cuda_timer_started = false;
|
||||
}
|
||||
if (m_gpu_frequency_cleanup_armed)
|
||||
{
|
||||
m_measure.gpu_frequency_stop();
|
||||
m_gpu_frequency_cleanup_armed = false;
|
||||
}
|
||||
if (m_stream_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream();
|
||||
m_stream_unblock_armed = false;
|
||||
}
|
||||
m_measure.sync_stream();
|
||||
if (m_profiler_started)
|
||||
{
|
||||
m_measure.profiler_stop();
|
||||
m_profiler_started = false;
|
||||
}
|
||||
if (m_cpu_timer_started)
|
||||
{
|
||||
m_measure.m_cpu_timer.stop();
|
||||
m_cpu_timer_started = false;
|
||||
}
|
||||
__forceinline__ void cuda_timer_start()
|
||||
{
|
||||
m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
|
||||
}
|
||||
|
||||
cleanup.release();
|
||||
__forceinline__ void cuda_timer_stop()
|
||||
{
|
||||
m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
|
||||
}
|
||||
|
||||
private:
|
||||
measure_cold_base &m_measure;
|
||||
bool m_disable_blocking_kernel;
|
||||
bool m_run_once;
|
||||
bool m_check_throttling;
|
||||
bool m_cpu_timer_started{false};
|
||||
bool m_stream_unblock_armed{false};
|
||||
bool m_gpu_frequency_cleanup_armed{false};
|
||||
bool m_profiler_started{false};
|
||||
bool m_cuda_timer_started{false};
|
||||
nvbench::detail::measure_cold_launch_timer_core<kernel_launch_timer> m_core;
|
||||
};
|
||||
|
||||
__forceinline__ void measure_cold_base::kernel_launch_timer::cleanup_noexcept() noexcept
|
||||
{
|
||||
const bool sync_armed = m_stream_unblock_armed || m_cuda_timer_started ||
|
||||
m_gpu_frequency_cleanup_armed;
|
||||
|
||||
if (m_stream_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream_noexcept();
|
||||
m_stream_unblock_armed = false;
|
||||
}
|
||||
if (sync_armed)
|
||||
{
|
||||
(void)m_measure.sync_stream_noexcept();
|
||||
}
|
||||
if (m_profiler_started)
|
||||
{
|
||||
(void)m_measure.profiler_stop_noexcept();
|
||||
m_profiler_started = false;
|
||||
}
|
||||
if (m_cpu_timer_started)
|
||||
{
|
||||
m_measure.m_cpu_timer.stop();
|
||||
m_cpu_timer_started = false;
|
||||
}
|
||||
|
||||
m_cuda_timer_started = false;
|
||||
m_gpu_frequency_cleanup_armed = false;
|
||||
}
|
||||
|
||||
template <typename KernelLauncher>
|
||||
struct measure_cold : public measure_cold_base
|
||||
{
|
||||
|
||||
183
nvbench/detail/measure_cold_launch_timer_core.cuh
Normal file
183
nvbench/detail/measure_cold_launch_timer_core.cuh
Normal file
@@ -0,0 +1,183 @@
|
||||
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
|
||||
#if defined(NVBENCH_IMPLICIT_SYSTEM_HEADER_GCC)
|
||||
#pragma GCC system_header
|
||||
#elif defined(NVBENCH_IMPLICIT_SYSTEM_HEADER_CLANG)
|
||||
#pragma clang system_header
|
||||
#elif defined(NVBENCH_IMPLICIT_SYSTEM_HEADER_MSVC)
|
||||
#pragma system_header
|
||||
#endif
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
struct measure_cold_launch_timer_config
|
||||
{
|
||||
bool disable_blocking_kernel{false};
|
||||
bool run_once{false};
|
||||
bool check_throttling{true};
|
||||
};
|
||||
|
||||
template <typename Measure>
|
||||
struct measure_cold_launch_timer_core
|
||||
{
|
||||
private:
|
||||
void cleanup_noexcept() noexcept
|
||||
{
|
||||
const bool sync_armed = m_stream_unblock_armed || m_cuda_timer_started ||
|
||||
m_gpu_frequency_cleanup_armed;
|
||||
|
||||
if (m_stream_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream_noexcept();
|
||||
m_stream_unblock_armed = false;
|
||||
}
|
||||
if (sync_armed)
|
||||
{
|
||||
(void)m_measure.sync_stream_noexcept();
|
||||
}
|
||||
if (m_profiler_started)
|
||||
{
|
||||
(void)m_measure.profiler_stop_noexcept();
|
||||
m_profiler_started = false;
|
||||
}
|
||||
if (m_cpu_timer_started)
|
||||
{
|
||||
m_measure.cpu_timer_stop_noexcept();
|
||||
m_cpu_timer_started = false;
|
||||
}
|
||||
|
||||
m_cuda_timer_started = false;
|
||||
m_gpu_frequency_cleanup_armed = false;
|
||||
}
|
||||
|
||||
struct cleanup_guard
|
||||
{
|
||||
explicit cleanup_guard(measure_cold_launch_timer_core &timer)
|
||||
: m_timer{timer}
|
||||
{}
|
||||
|
||||
cleanup_guard(const cleanup_guard &) = delete;
|
||||
cleanup_guard(cleanup_guard &&) = delete;
|
||||
cleanup_guard &operator=(const cleanup_guard &) = delete;
|
||||
cleanup_guard &operator=(cleanup_guard &&) = delete;
|
||||
|
||||
~cleanup_guard() noexcept
|
||||
{
|
||||
if (m_active)
|
||||
{
|
||||
m_timer.cleanup_noexcept();
|
||||
}
|
||||
}
|
||||
|
||||
void release() noexcept { m_active = false; }
|
||||
|
||||
private:
|
||||
measure_cold_launch_timer_core &m_timer;
|
||||
bool m_active{true};
|
||||
};
|
||||
|
||||
public:
|
||||
explicit measure_cold_launch_timer_core(Measure &measure, measure_cold_launch_timer_config config)
|
||||
: m_measure{measure}
|
||||
, m_disable_blocking_kernel{config.disable_blocking_kernel}
|
||||
, m_run_once{config.run_once}
|
||||
, m_check_throttling{config.check_throttling}
|
||||
{}
|
||||
|
||||
measure_cold_launch_timer_core(const measure_cold_launch_timer_core &) = delete;
|
||||
measure_cold_launch_timer_core(measure_cold_launch_timer_core &&) = delete;
|
||||
measure_cold_launch_timer_core &operator=(const measure_cold_launch_timer_core &) = delete;
|
||||
measure_cold_launch_timer_core &operator=(measure_cold_launch_timer_core &&) = delete;
|
||||
|
||||
~measure_cold_launch_timer_core() noexcept { this->cleanup_noexcept(); }
|
||||
|
||||
void start()
|
||||
{
|
||||
cleanup_guard cleanup{*this};
|
||||
|
||||
m_measure.flush_device_l2();
|
||||
m_measure.sync_stream();
|
||||
|
||||
// Start CPU timer irrespective of use of blocking kernel.
|
||||
// Ref: https://github.com/NVIDIA/nvbench/issues/249
|
||||
m_measure.cpu_timer_start();
|
||||
m_cpu_timer_started = true;
|
||||
|
||||
if (!m_disable_blocking_kernel)
|
||||
{
|
||||
// Arm cleanup before queueing the blocking kernel. If block_stream throws
|
||||
// after queueing work, cleanup_noexcept must still unblock the stream.
|
||||
m_stream_unblock_armed = true;
|
||||
m_measure.block_stream();
|
||||
}
|
||||
if (m_check_throttling)
|
||||
{
|
||||
// Arm cleanup before queueing timestamp work. If gpu_frequency_start
|
||||
// throws after queueing work, cleanup_noexcept must still sync the stream.
|
||||
m_gpu_frequency_cleanup_armed = true;
|
||||
m_measure.gpu_frequency_start();
|
||||
}
|
||||
if (m_run_once)
|
||||
{
|
||||
m_measure.profiler_start();
|
||||
m_profiler_started = true;
|
||||
}
|
||||
m_measure.cuda_timer_start();
|
||||
m_cuda_timer_started = true;
|
||||
|
||||
cleanup.release();
|
||||
}
|
||||
|
||||
void stop()
|
||||
{
|
||||
cleanup_guard cleanup{*this};
|
||||
|
||||
if (m_cuda_timer_started)
|
||||
{
|
||||
m_measure.cuda_timer_stop();
|
||||
m_cuda_timer_started = false;
|
||||
}
|
||||
if (m_gpu_frequency_cleanup_armed)
|
||||
{
|
||||
m_measure.gpu_frequency_stop();
|
||||
m_gpu_frequency_cleanup_armed = false;
|
||||
}
|
||||
if (m_stream_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream();
|
||||
m_stream_unblock_armed = false;
|
||||
}
|
||||
m_measure.sync_stream();
|
||||
if (m_profiler_started)
|
||||
{
|
||||
m_measure.profiler_stop();
|
||||
m_profiler_started = false;
|
||||
}
|
||||
if (m_cpu_timer_started)
|
||||
{
|
||||
m_measure.cpu_timer_stop();
|
||||
m_cpu_timer_started = false;
|
||||
}
|
||||
|
||||
cleanup.release();
|
||||
}
|
||||
|
||||
private:
|
||||
Measure &m_measure;
|
||||
bool m_disable_blocking_kernel;
|
||||
bool m_run_once;
|
||||
bool m_check_throttling;
|
||||
bool m_cpu_timer_started{false};
|
||||
bool m_stream_unblock_armed{false};
|
||||
bool m_gpu_frequency_cleanup_armed{false};
|
||||
bool m_profiler_started{false};
|
||||
bool m_cuda_timer_started{false};
|
||||
};
|
||||
|
||||
} // namespace nvbench::detail
|
||||
@@ -32,6 +32,7 @@
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
#include <nvbench/detail/stream_cleanup_guard.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
|
||||
@@ -57,6 +58,8 @@ struct measure_hot_base
|
||||
measure_hot_base &operator=(measure_hot_base &&) = delete;
|
||||
|
||||
protected:
|
||||
friend struct nvbench::detail::stream_cleanup_guard<measure_hot_base>;
|
||||
|
||||
void check();
|
||||
|
||||
void initialize()
|
||||
@@ -82,60 +85,6 @@ protected:
|
||||
|
||||
__forceinline__ void sync_stream() const { NVBENCH_CUDA_CALL(this->sync_stream_noexcept()); }
|
||||
|
||||
struct stream_cleanup_guard
|
||||
{
|
||||
explicit stream_cleanup_guard(measure_hot_base &measure)
|
||||
: m_measure{measure}
|
||||
{
|
||||
m_sync_armed = true;
|
||||
}
|
||||
|
||||
stream_cleanup_guard(const stream_cleanup_guard &) = delete;
|
||||
stream_cleanup_guard(stream_cleanup_guard &&) = delete;
|
||||
stream_cleanup_guard &operator=(const stream_cleanup_guard &) = delete;
|
||||
stream_cleanup_guard &operator=(stream_cleanup_guard &&) = delete;
|
||||
|
||||
~stream_cleanup_guard() noexcept
|
||||
{
|
||||
if (m_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream_noexcept();
|
||||
}
|
||||
if (m_sync_armed)
|
||||
{
|
||||
(void)m_measure.sync_stream_noexcept();
|
||||
}
|
||||
}
|
||||
|
||||
void block_stream()
|
||||
{
|
||||
// Arm cleanup before queueing the blocking kernel. If block_stream throws
|
||||
// after queueing work, the destructor must still unblock the stream.
|
||||
m_unblock_armed = true;
|
||||
m_measure.block_stream();
|
||||
}
|
||||
|
||||
void unblock()
|
||||
{
|
||||
if (m_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream();
|
||||
m_unblock_armed = false;
|
||||
}
|
||||
}
|
||||
|
||||
void release() noexcept
|
||||
{
|
||||
m_unblock_armed = false;
|
||||
m_sync_armed = false;
|
||||
}
|
||||
|
||||
private:
|
||||
measure_hot_base &m_measure;
|
||||
bool m_unblock_armed{false};
|
||||
bool m_sync_armed{false};
|
||||
};
|
||||
|
||||
nvbench::state &m_state;
|
||||
|
||||
nvbench::launch m_launch;
|
||||
@@ -178,7 +127,7 @@ private:
|
||||
// measurement.
|
||||
void run_warmup()
|
||||
{
|
||||
stream_cleanup_guard cleanup{*this};
|
||||
nvbench::detail::stream_cleanup_guard<measure_hot_base> cleanup{*this};
|
||||
|
||||
m_cuda_timer.start(m_launch.get_stream());
|
||||
this->launch_kernel();
|
||||
@@ -204,7 +153,7 @@ private:
|
||||
{
|
||||
batch_size = std::max(batch_size, nvbench::int64_t{1});
|
||||
|
||||
stream_cleanup_guard cleanup{*this};
|
||||
nvbench::detail::stream_cleanup_guard<measure_hot_base> cleanup{*this};
|
||||
|
||||
if (!m_disable_blocking_kernel)
|
||||
{
|
||||
|
||||
74
nvbench/detail/stream_cleanup_guard.cuh
Normal file
74
nvbench/detail/stream_cleanup_guard.cuh
Normal file
@@ -0,0 +1,74 @@
|
||||
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
|
||||
#if defined(NVBENCH_IMPLICIT_SYSTEM_HEADER_GCC)
|
||||
#pragma GCC system_header
|
||||
#elif defined(NVBENCH_IMPLICIT_SYSTEM_HEADER_CLANG)
|
||||
#pragma clang system_header
|
||||
#elif defined(NVBENCH_IMPLICIT_SYSTEM_HEADER_MSVC)
|
||||
#pragma system_header
|
||||
#endif
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
template <typename Measure>
|
||||
struct stream_cleanup_guard
|
||||
{
|
||||
explicit stream_cleanup_guard(Measure &measure)
|
||||
: m_measure{measure}
|
||||
{
|
||||
m_sync_armed = true;
|
||||
}
|
||||
|
||||
stream_cleanup_guard(const stream_cleanup_guard &) = delete;
|
||||
stream_cleanup_guard(stream_cleanup_guard &&) = delete;
|
||||
stream_cleanup_guard &operator=(const stream_cleanup_guard &) = delete;
|
||||
stream_cleanup_guard &operator=(stream_cleanup_guard &&) = delete;
|
||||
|
||||
~stream_cleanup_guard() noexcept
|
||||
{
|
||||
if (m_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream_noexcept();
|
||||
}
|
||||
if (m_sync_armed)
|
||||
{
|
||||
(void)m_measure.sync_stream_noexcept();
|
||||
}
|
||||
}
|
||||
|
||||
void block_stream()
|
||||
{
|
||||
// Arm cleanup before queueing the blocking kernel. If block_stream throws
|
||||
// after queueing work, the destructor must still unblock the stream.
|
||||
m_unblock_armed = true;
|
||||
m_measure.block_stream();
|
||||
}
|
||||
|
||||
void unblock()
|
||||
{
|
||||
if (m_unblock_armed)
|
||||
{
|
||||
m_measure.unblock_stream();
|
||||
m_unblock_armed = false;
|
||||
}
|
||||
}
|
||||
|
||||
void release() noexcept
|
||||
{
|
||||
m_unblock_armed = false;
|
||||
m_sync_armed = false;
|
||||
}
|
||||
|
||||
private:
|
||||
Measure &m_measure;
|
||||
bool m_unblock_armed{false};
|
||||
bool m_sync_armed{false};
|
||||
};
|
||||
|
||||
} // namespace nvbench::detail
|
||||
@@ -1,6 +1,7 @@
|
||||
set(test_srcs
|
||||
axes_metadata.cu
|
||||
benchmark.cu
|
||||
cleanup_guards.cu
|
||||
create.cu
|
||||
cuda_timer.cu
|
||||
cuda_stream.cu
|
||||
|
||||
253
testing/cleanup_guards.cu
Normal file
253
testing/cleanup_guards.cu
Normal file
@@ -0,0 +1,253 @@
|
||||
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#include <nvbench/detail/measure_cold_launch_timer_core.cuh>
|
||||
#include <nvbench/detail/stream_cleanup_guard.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <initializer_list>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
enum class action
|
||||
{
|
||||
flush_device_l2,
|
||||
sync_stream,
|
||||
sync_stream_noexcept,
|
||||
cpu_timer_start,
|
||||
cpu_timer_stop,
|
||||
cpu_timer_stop_noexcept,
|
||||
block_stream,
|
||||
unblock_stream,
|
||||
unblock_stream_noexcept,
|
||||
gpu_frequency_start,
|
||||
gpu_frequency_stop,
|
||||
profiler_start,
|
||||
profiler_stop,
|
||||
profiler_stop_noexcept,
|
||||
cuda_timer_start,
|
||||
cuda_timer_stop,
|
||||
};
|
||||
|
||||
struct fake_measure
|
||||
{
|
||||
void clear_actions() noexcept
|
||||
{
|
||||
action_count = 0;
|
||||
overflow = false;
|
||||
}
|
||||
|
||||
void throw_on(action a) noexcept
|
||||
{
|
||||
should_throw = true;
|
||||
throw_action = a;
|
||||
}
|
||||
|
||||
void record(action a) noexcept
|
||||
{
|
||||
if (action_count < actions.size())
|
||||
{
|
||||
actions[action_count++] = a;
|
||||
}
|
||||
else
|
||||
{
|
||||
overflow = true;
|
||||
}
|
||||
}
|
||||
|
||||
void record_or_throw(action a)
|
||||
{
|
||||
this->record(a);
|
||||
if (should_throw && throw_action == a)
|
||||
{
|
||||
should_throw = false;
|
||||
throw std::runtime_error{"Injected fake_measure failure."};
|
||||
}
|
||||
}
|
||||
|
||||
void flush_device_l2() { this->record_or_throw(action::flush_device_l2); }
|
||||
void sync_stream() { this->record_or_throw(action::sync_stream); }
|
||||
int sync_stream_noexcept() noexcept
|
||||
{
|
||||
this->record(action::sync_stream_noexcept);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cpu_timer_start() noexcept { this->record(action::cpu_timer_start); }
|
||||
void cpu_timer_stop() noexcept { this->record(action::cpu_timer_stop); }
|
||||
void cpu_timer_stop_noexcept() noexcept { this->record(action::cpu_timer_stop_noexcept); }
|
||||
|
||||
void block_stream() { this->record_or_throw(action::block_stream); }
|
||||
void unblock_stream() { this->record_or_throw(action::unblock_stream); }
|
||||
void unblock_stream_noexcept() noexcept { this->record(action::unblock_stream_noexcept); }
|
||||
|
||||
void gpu_frequency_start() { this->record_or_throw(action::gpu_frequency_start); }
|
||||
void gpu_frequency_stop() { this->record_or_throw(action::gpu_frequency_stop); }
|
||||
|
||||
void profiler_start() { this->record_or_throw(action::profiler_start); }
|
||||
void profiler_stop() { this->record_or_throw(action::profiler_stop); }
|
||||
int profiler_stop_noexcept() noexcept
|
||||
{
|
||||
this->record(action::profiler_stop_noexcept);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cuda_timer_start() { this->record_or_throw(action::cuda_timer_start); }
|
||||
void cuda_timer_stop() { this->record_or_throw(action::cuda_timer_stop); }
|
||||
|
||||
std::array<action, 32> actions{};
|
||||
std::size_t action_count{};
|
||||
action throw_action{};
|
||||
bool should_throw{false};
|
||||
bool overflow{false};
|
||||
};
|
||||
|
||||
template <typename Callable>
|
||||
void assert_throws(Callable &&callable)
|
||||
{
|
||||
bool threw = false;
|
||||
try
|
||||
{
|
||||
callable();
|
||||
}
|
||||
catch (const std::runtime_error &)
|
||||
{
|
||||
threw = true;
|
||||
}
|
||||
ASSERT(threw);
|
||||
}
|
||||
|
||||
void assert_actions(const fake_measure &measure, std::initializer_list<action> expected)
|
||||
{
|
||||
ASSERT(!measure.overflow);
|
||||
ASSERT(measure.action_count == expected.size());
|
||||
|
||||
std::size_t index = 0;
|
||||
for (const action expected_action : expected)
|
||||
{
|
||||
ASSERT(measure.actions[index] == expected_action);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
void test_stream_cleanup_guard_block_stream_throw()
|
||||
{
|
||||
fake_measure measure;
|
||||
measure.throw_on(action::block_stream);
|
||||
|
||||
assert_throws([&measure] {
|
||||
nvbench::detail::stream_cleanup_guard<fake_measure> cleanup{measure};
|
||||
cleanup.block_stream();
|
||||
});
|
||||
|
||||
assert_actions(
|
||||
measure,
|
||||
{action::block_stream, action::unblock_stream_noexcept, action::sync_stream_noexcept});
|
||||
}
|
||||
|
||||
void test_stream_cleanup_guard_unblock_then_throw()
|
||||
{
|
||||
fake_measure measure;
|
||||
|
||||
assert_throws([&measure] {
|
||||
nvbench::detail::stream_cleanup_guard<fake_measure> cleanup{measure};
|
||||
cleanup.block_stream();
|
||||
cleanup.unblock();
|
||||
throw std::runtime_error{"Injected post-unblock failure."};
|
||||
});
|
||||
|
||||
assert_actions(measure,
|
||||
{action::block_stream, action::unblock_stream, action::sync_stream_noexcept});
|
||||
}
|
||||
|
||||
void test_kernel_launch_timer_block_stream_throw()
|
||||
{
|
||||
fake_measure measure;
|
||||
measure.throw_on(action::block_stream);
|
||||
|
||||
assert_throws([&measure] {
|
||||
nvbench::detail::measure_cold_launch_timer_core<fake_measure> timer{
|
||||
measure,
|
||||
nvbench::detail::measure_cold_launch_timer_config{false, false, true}};
|
||||
timer.start();
|
||||
});
|
||||
|
||||
assert_actions(measure,
|
||||
{action::flush_device_l2,
|
||||
action::sync_stream,
|
||||
action::cpu_timer_start,
|
||||
action::block_stream,
|
||||
action::unblock_stream_noexcept,
|
||||
action::sync_stream_noexcept,
|
||||
action::cpu_timer_stop_noexcept});
|
||||
}
|
||||
|
||||
void test_kernel_launch_timer_gpu_frequency_start_throw()
|
||||
{
|
||||
fake_measure measure;
|
||||
measure.throw_on(action::gpu_frequency_start);
|
||||
|
||||
assert_throws([&measure] {
|
||||
nvbench::detail::measure_cold_launch_timer_core<fake_measure> timer{
|
||||
measure,
|
||||
nvbench::detail::measure_cold_launch_timer_config{false, false, true}};
|
||||
timer.start();
|
||||
});
|
||||
|
||||
assert_actions(measure,
|
||||
{action::flush_device_l2,
|
||||
action::sync_stream,
|
||||
action::cpu_timer_start,
|
||||
action::block_stream,
|
||||
action::gpu_frequency_start,
|
||||
action::unblock_stream_noexcept,
|
||||
action::sync_stream_noexcept,
|
||||
action::cpu_timer_stop_noexcept});
|
||||
}
|
||||
|
||||
void test_kernel_launch_timer_gpu_frequency_stop_throw()
|
||||
{
|
||||
fake_measure measure;
|
||||
nvbench::detail::measure_cold_launch_timer_core<fake_measure> timer{
|
||||
measure,
|
||||
nvbench::detail::measure_cold_launch_timer_config{false, false, true}};
|
||||
|
||||
timer.start();
|
||||
measure.clear_actions();
|
||||
measure.throw_on(action::gpu_frequency_stop);
|
||||
|
||||
assert_throws([&timer] { timer.stop(); });
|
||||
|
||||
assert_actions(measure,
|
||||
{action::cuda_timer_stop,
|
||||
action::gpu_frequency_stop,
|
||||
action::unblock_stream_noexcept,
|
||||
action::sync_stream_noexcept,
|
||||
action::cpu_timer_stop_noexcept});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
test_stream_cleanup_guard_block_stream_throw();
|
||||
test_stream_cleanup_guard_unblock_then_throw();
|
||||
test_kernel_launch_timer_block_stream_throw();
|
||||
test_kernel_launch_timer_gpu_frequency_start_throw();
|
||||
test_kernel_launch_timer_gpu_frequency_stop_throw();
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
fmt::print("{}\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
Reference in New Issue
Block a user