mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-06-29 18:57:44 +00:00
Improve exception safety of timer structs by using local scope guards to ensure that cleanup steps, such as signaling blocking kernel to unblock and making sure that the stream is synchronized are performed even launch object throws an exception. Tests of exception safety were added. -- * blocking_kernel.unblock_noexcept() noexcept method added This decouples the logic of signaling to unblock from checking of the timeout. * Improve exception safely in kernel_launch_timer Introduce noexcept cleanup methods. Place body of start() and stop() methods in the try/catch block and execute noexcept clean-up on exception before rethrowing. * Improve exception safety of measure_hot * Make sure that throwing methods call noexcept ones instead of duplicating functionality * Use cleanup_guard in measure_cold_base::kernel_launch_timer Replace try/catch pattern with cleaner use of cleanup_guard class. * cpu_timer::start, cpu_timer::stop methods marked noexcept These methods do not throw, and marking them noexcept explicitly makes it fine to call them from other noexcept methods, as such cleanup_noexcept in measure_cold. * Address remaining exception safety issue in measure_hot * Renamed guard variables to reflect their purpose, apply arm-then-do to ops queueing kernels Set m_block_stream_armed = true; before launching the kernel. Doing so signals cleanup guard that stream must be unblocked, even if launching of the kernel failed. Same for operation launching time-stamps kernel. * Add testing/device/exception_safety.cu This test add benchmark that throws. It verifies that it did not time-out and control counters the benchmark maintains are at the expected values. * Refactor measurement cleanup guards for testability Extract hot stream cleanup and cold launch timer cleanup into reusable detail helpers. Keep measure_hot and measure_cold using those helpers through thin adapters so the tested cleanup logic matches the production path. Add driver-free cleanup guard tests using a fake measure object to verify cleanup ordering when exceptions occur after blocking stream setup, after hot unblock, and around cold GPU frequency start/stop paths. * Implement cpu_timer_stop_noexcept in terms of cpu_timer_stop The cpu_timer_stop is already noexcept by nature of implementation, but we maintain cpu_timer_stop_noexcept method for symmetry with other pairs sync_stream()/sync_stream_noexcept(). The cpu_timer_stop_noexcept() is implemented via cpu_timer_stop(). These methods are annotated __forceinline__, so the same code should be generated. * More readable initialization of bool members * Moved exception_safety.cu back to testing/ folder testing/device is reserved for tests that require locking of GPU frequency per CMake option description. * Fixed nitpick and bug it discovered Changed testing/exception_safety.cu:237 so run_benchmark now iterates over every state from bench.get_states() and checks each one is skipped with a reason containing "requested". That exposed a real runner behavior gap, so I also made a minimal fix in nvbench/runner.cuh:120: after stop_runner_loop, remaining states are now explicitly marked skipped with a reason instead of only printing a skip notification. * Move static assertions (pertaining to cleanup guards) to testing/cleanup_guards.cu The CI failure with CTK 12.0 and certain version of GCC is caused by OOM in cudafe++ process tripped by compiling instantiation of contract verification on cold_launch_timer_probe struct. As a work-around, this instantiation is excluded for CTK 12.0-12.6
357 lines
11 KiB
Plaintext
357 lines
11 KiB
Plaintext
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
#include <nvbench/blocking_kernel.cuh>
|
|
#include <nvbench/cpu_timer.cuh>
|
|
#include <nvbench/detail/measure_cold.cuh>
|
|
#include <nvbench/detail/measure_cold_launch_timer_core.cuh>
|
|
#include <nvbench/detail/measure_hot.cuh>
|
|
#include <nvbench/detail/stream_cleanup_guard.cuh>
|
|
|
|
#include <cuda_runtime_api.h>
|
|
|
|
#include <fmt/format.h>
|
|
|
|
#include <array>
|
|
#include <cstddef>
|
|
#include <initializer_list>
|
|
#include <stdexcept>
|
|
#include <type_traits>
|
|
#include <utility>
|
|
|
|
#include "test_asserts.cuh"
|
|
|
|
namespace
|
|
{
|
|
|
|
struct hot_cleanup_probe : nvbench::detail::measure_hot_base
|
|
{
|
|
using nvbench::detail::measure_hot_base::sync_stream_noexcept;
|
|
using nvbench::detail::measure_hot_base::unblock_stream_noexcept;
|
|
};
|
|
|
|
struct cold_cleanup_probe : nvbench::detail::measure_cold_base
|
|
{
|
|
using kernel_launch_timer = nvbench::detail::measure_cold_base::kernel_launch_timer;
|
|
|
|
using nvbench::detail::measure_cold_base::profiler_stop_noexcept;
|
|
using nvbench::detail::measure_cold_base::sync_stream_noexcept;
|
|
using nvbench::detail::measure_cold_base::unblock_stream_noexcept;
|
|
};
|
|
|
|
using cold_launch_timer_probe = cold_cleanup_probe::kernel_launch_timer;
|
|
using cold_launch_timer_core_probe =
|
|
nvbench::detail::measure_cold_launch_timer_core<cold_launch_timer_probe>;
|
|
|
|
template <typename Timer>
|
|
constexpr void verify_cpu_timer_noexcept_contract()
|
|
{
|
|
static_assert(noexcept(std::declval<Timer &>().start()),
|
|
"CPU timer start must remain noexcept for cleanup-safe measurement code.");
|
|
static_assert(noexcept(std::declval<Timer &>().stop()),
|
|
"CPU timer stop must remain noexcept for cleanup-safe measurement code.");
|
|
}
|
|
|
|
template <typename Measure>
|
|
constexpr void verify_stream_cleanup_measure_noexcept_contract()
|
|
{
|
|
static_assert(noexcept(std::declval<Measure &>().sync_stream_noexcept()),
|
|
"Cleanup measure sync_stream_noexcept must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Measure &>().unblock_stream_noexcept()),
|
|
"Cleanup measure unblock_stream_noexcept must remain noexcept.");
|
|
}
|
|
|
|
template <typename Measure>
|
|
constexpr void verify_cold_measure_noexcept_contract()
|
|
{
|
|
verify_stream_cleanup_measure_noexcept_contract<Measure>();
|
|
static_assert(noexcept(std::declval<Measure &>().profiler_stop_noexcept()),
|
|
"Cold cleanup measure profiler_stop_noexcept must remain noexcept.");
|
|
}
|
|
|
|
template <typename Timer>
|
|
constexpr void verify_cold_launch_timer_noexcept_contract()
|
|
{
|
|
static_assert(noexcept(std::declval<Timer &>().cpu_timer_start()),
|
|
"Cold kernel_launch_timer cpu_timer_start must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Timer &>().cpu_timer_stop()),
|
|
"Cold kernel_launch_timer cpu_timer_stop must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Timer &>().cpu_timer_stop_noexcept()),
|
|
"Cold kernel_launch_timer cpu_timer_stop_noexcept must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Timer &>().sync_stream_noexcept()),
|
|
"Cold kernel_launch_timer sync_stream_noexcept must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Timer &>().profiler_stop_noexcept()),
|
|
"Cold kernel_launch_timer profiler_stop_noexcept must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Timer &>().unblock_stream_noexcept()),
|
|
"Cold kernel_launch_timer unblock_stream_noexcept must remain noexcept.");
|
|
static_assert(std::is_nothrow_destructible_v<Timer>,
|
|
"Cold kernel_launch_timer destructor must remain noexcept.");
|
|
}
|
|
|
|
template <typename Guard>
|
|
constexpr void verify_stream_cleanup_guard_noexcept_contract()
|
|
{
|
|
static_assert(std::is_nothrow_destructible_v<Guard>,
|
|
"stream_cleanup_guard destructor must remain noexcept.");
|
|
static_assert(noexcept(std::declval<Guard &>().release()),
|
|
"stream_cleanup_guard release must remain noexcept.");
|
|
}
|
|
|
|
constexpr void verify_noexcept_contracts()
|
|
{
|
|
verify_cpu_timer_noexcept_contract<nvbench::cpu_timer>();
|
|
static_assert(noexcept(std::declval<nvbench::blocking_kernel &>().unblock_noexcept()),
|
|
"blocking_kernel unblock_noexcept must remain noexcept.");
|
|
|
|
verify_stream_cleanup_measure_noexcept_contract<hot_cleanup_probe>();
|
|
verify_cold_measure_noexcept_contract<cold_cleanup_probe>();
|
|
#if defined(CUDART_VERSION) && CUDART_VERSION > 12600
|
|
// CUDA 12.0 through 12.6 can exhaust host memory in cudafe++ while checking
|
|
// this contract.
|
|
verify_cold_launch_timer_noexcept_contract<cold_launch_timer_probe>();
|
|
#endif
|
|
verify_stream_cleanup_guard_noexcept_contract<
|
|
nvbench::detail::stream_cleanup_guard<hot_cleanup_probe>>();
|
|
|
|
static_assert(std::is_nothrow_destructible_v<cold_launch_timer_core_probe>,
|
|
"measure_cold_launch_timer_core destructor must remain noexcept.");
|
|
}
|
|
|
|
static_assert((verify_noexcept_contracts(), true), "Noexcept cleanup contracts must hold.");
|
|
|
|
enum class action
|
|
{
|
|
flush_device_l2,
|
|
sync_stream,
|
|
sync_stream_noexcept,
|
|
cpu_timer_start,
|
|
cpu_timer_stop,
|
|
cpu_timer_stop_noexcept,
|
|
block_stream,
|
|
unblock_stream,
|
|
unblock_stream_noexcept,
|
|
gpu_frequency_start,
|
|
gpu_frequency_stop,
|
|
profiler_start,
|
|
profiler_stop,
|
|
profiler_stop_noexcept,
|
|
cuda_timer_start,
|
|
cuda_timer_stop,
|
|
};
|
|
|
|
struct fake_measure
|
|
{
|
|
void clear_actions() noexcept
|
|
{
|
|
action_count = 0;
|
|
overflow = false;
|
|
}
|
|
|
|
void throw_on(action a) noexcept
|
|
{
|
|
should_throw = true;
|
|
throw_action = a;
|
|
}
|
|
|
|
void record(action a) noexcept
|
|
{
|
|
if (action_count < actions.size())
|
|
{
|
|
actions[action_count++] = a;
|
|
}
|
|
else
|
|
{
|
|
overflow = true;
|
|
}
|
|
}
|
|
|
|
void record_or_throw(action a)
|
|
{
|
|
this->record(a);
|
|
if (should_throw && throw_action == a)
|
|
{
|
|
should_throw = false;
|
|
throw std::runtime_error{"Injected fake_measure failure."};
|
|
}
|
|
}
|
|
|
|
void flush_device_l2() { this->record_or_throw(action::flush_device_l2); }
|
|
void sync_stream() { this->record_or_throw(action::sync_stream); }
|
|
int sync_stream_noexcept() noexcept
|
|
{
|
|
this->record(action::sync_stream_noexcept);
|
|
return 0;
|
|
}
|
|
|
|
void cpu_timer_start() noexcept { this->record(action::cpu_timer_start); }
|
|
void cpu_timer_stop() noexcept { this->record(action::cpu_timer_stop); }
|
|
void cpu_timer_stop_noexcept() noexcept { this->record(action::cpu_timer_stop_noexcept); }
|
|
|
|
void block_stream() { this->record_or_throw(action::block_stream); }
|
|
void unblock_stream() { this->record_or_throw(action::unblock_stream); }
|
|
void unblock_stream_noexcept() noexcept { this->record(action::unblock_stream_noexcept); }
|
|
|
|
void gpu_frequency_start() { this->record_or_throw(action::gpu_frequency_start); }
|
|
void gpu_frequency_stop() { this->record_or_throw(action::gpu_frequency_stop); }
|
|
|
|
void profiler_start() { this->record_or_throw(action::profiler_start); }
|
|
void profiler_stop() { this->record_or_throw(action::profiler_stop); }
|
|
int profiler_stop_noexcept() noexcept
|
|
{
|
|
this->record(action::profiler_stop_noexcept);
|
|
return 0;
|
|
}
|
|
|
|
void cuda_timer_start() { this->record_or_throw(action::cuda_timer_start); }
|
|
void cuda_timer_stop() { this->record_or_throw(action::cuda_timer_stop); }
|
|
|
|
std::array<action, 32> actions{};
|
|
std::size_t action_count{};
|
|
action throw_action{};
|
|
bool should_throw{false};
|
|
bool overflow{false};
|
|
};
|
|
|
|
template <typename Callable>
|
|
void assert_throws(Callable &&callable)
|
|
{
|
|
bool threw = false;
|
|
try
|
|
{
|
|
callable();
|
|
}
|
|
catch (const std::runtime_error &)
|
|
{
|
|
threw = true;
|
|
}
|
|
ASSERT(threw);
|
|
}
|
|
|
|
void assert_actions(const fake_measure &measure, std::initializer_list<action> expected)
|
|
{
|
|
ASSERT(!measure.overflow);
|
|
ASSERT(measure.action_count == expected.size());
|
|
|
|
std::size_t index = 0;
|
|
for (const action expected_action : expected)
|
|
{
|
|
ASSERT(measure.actions[index] == expected_action);
|
|
++index;
|
|
}
|
|
}
|
|
|
|
void test_stream_cleanup_guard_block_stream_throw()
|
|
{
|
|
fake_measure measure;
|
|
measure.throw_on(action::block_stream);
|
|
|
|
assert_throws([&measure] {
|
|
nvbench::detail::stream_cleanup_guard<fake_measure> cleanup{measure};
|
|
cleanup.block_stream();
|
|
});
|
|
|
|
assert_actions(
|
|
measure,
|
|
{action::block_stream, action::unblock_stream_noexcept, action::sync_stream_noexcept});
|
|
}
|
|
|
|
void test_stream_cleanup_guard_unblock_then_throw()
|
|
{
|
|
fake_measure measure;
|
|
|
|
assert_throws([&measure] {
|
|
nvbench::detail::stream_cleanup_guard<fake_measure> cleanup{measure};
|
|
cleanup.block_stream();
|
|
cleanup.unblock();
|
|
throw std::runtime_error{"Injected post-unblock failure."};
|
|
});
|
|
|
|
assert_actions(measure,
|
|
{action::block_stream, action::unblock_stream, action::sync_stream_noexcept});
|
|
}
|
|
|
|
void test_kernel_launch_timer_block_stream_throw()
|
|
{
|
|
fake_measure measure;
|
|
measure.throw_on(action::block_stream);
|
|
|
|
assert_throws([&measure] {
|
|
nvbench::detail::measure_cold_launch_timer_core<fake_measure> timer{
|
|
measure,
|
|
nvbench::detail::measure_cold_launch_timer_config{false, false, true}};
|
|
timer.start();
|
|
});
|
|
|
|
assert_actions(measure,
|
|
{action::flush_device_l2,
|
|
action::sync_stream,
|
|
action::cpu_timer_start,
|
|
action::block_stream,
|
|
action::unblock_stream_noexcept,
|
|
action::sync_stream_noexcept,
|
|
action::cpu_timer_stop_noexcept});
|
|
}
|
|
|
|
void test_kernel_launch_timer_gpu_frequency_start_throw()
|
|
{
|
|
fake_measure measure;
|
|
measure.throw_on(action::gpu_frequency_start);
|
|
|
|
assert_throws([&measure] {
|
|
nvbench::detail::measure_cold_launch_timer_core<fake_measure> timer{
|
|
measure,
|
|
nvbench::detail::measure_cold_launch_timer_config{false, false, true}};
|
|
timer.start();
|
|
});
|
|
|
|
assert_actions(measure,
|
|
{action::flush_device_l2,
|
|
action::sync_stream,
|
|
action::cpu_timer_start,
|
|
action::block_stream,
|
|
action::gpu_frequency_start,
|
|
action::unblock_stream_noexcept,
|
|
action::sync_stream_noexcept,
|
|
action::cpu_timer_stop_noexcept});
|
|
}
|
|
|
|
void test_kernel_launch_timer_gpu_frequency_stop_throw()
|
|
{
|
|
fake_measure measure;
|
|
nvbench::detail::measure_cold_launch_timer_core<fake_measure> timer{
|
|
measure,
|
|
nvbench::detail::measure_cold_launch_timer_config{false, false, true}};
|
|
|
|
timer.start();
|
|
measure.clear_actions();
|
|
measure.throw_on(action::gpu_frequency_stop);
|
|
|
|
assert_throws([&timer] { timer.stop(); });
|
|
|
|
assert_actions(measure,
|
|
{action::cuda_timer_stop,
|
|
action::gpu_frequency_stop,
|
|
action::unblock_stream_noexcept,
|
|
action::sync_stream_noexcept,
|
|
action::cpu_timer_stop_noexcept});
|
|
}
|
|
|
|
} // namespace
|
|
|
|
int main()
|
|
try
|
|
{
|
|
test_stream_cleanup_guard_block_stream_throw();
|
|
test_stream_cleanup_guard_unblock_then_throw();
|
|
test_kernel_launch_timer_block_stream_throw();
|
|
test_kernel_launch_timer_gpu_frequency_start_throw();
|
|
test_kernel_launch_timer_gpu_frequency_stop_throw();
|
|
|
|
return 0;
|
|
}
|
|
catch (std::exception &e)
|
|
{
|
|
fmt::print("{}\n", e.what());
|
|
return 1;
|
|
}
|