Set CUDA_MODULE_LOADING=EAGER before main. (#157)

* Set `CUDA_MODULE_LOADING=EAGER` before `main`.

Fixes #136

* Portability for `setenv`.

* Remove pre-main CUDART usage and setup env in main.

* Fail examples if they deadlock.

This is the best way we have to diagnose a regression for
NVIDIA/nvbench#136.

* Add an initialize method to benchmark_manager for CUDA-related setup.

Benchmarks are created statically, so their constructors cannot call the CUDA APIs without breaking the CUDA_MODULE_LOAD setup.

This method is called from `main` after the environment has been configured.
This commit is contained in:
Allison Piper
2024-04-06 11:03:42 -04:00
committed by GitHub
parent e8c8877d36
commit a2f88ff790
6 changed files with 39 additions and 2 deletions

View File

@@ -26,6 +26,12 @@ foreach(example_src IN LISTS example_srcs)
COMMAND "$<TARGET_FILE:${example_name}>" --timeout 0.1 --min-time 1e-5
)
# These should not deadlock. If they do, it may be that the CUDA context was created before
# setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136.
set_tests_properties(${example_name} PROPERTIES
FAIL_REGULAR_EXPRESSION "Possible Deadlock Detected"
)
add_dependencies(nvbench.example.all ${example_name})
endforeach()

View File

@@ -20,7 +20,6 @@
#include <nvbench/axes_metadata.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/state.cuh>
#include <nvbench/stopping_criterion.cuh>
@@ -53,7 +52,6 @@ struct benchmark_base
template <typename TypeAxes>
explicit benchmark_base(TypeAxes type_axes)
: m_axes(type_axes)
, m_devices(nvbench::device_manager::get().get_devices())
{}
virtual ~benchmark_base();

View File

@@ -38,6 +38,15 @@ struct benchmark_manager
*/
[[nodiscard]] static benchmark_manager &get();
/**
* Setup any default values for the benchmarks. Invoked from `main`.
*
* Specifically, any CUDA calls (e.g. cudaGetDeviceProperties, etc) needed to initialize the
* benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA
* environment in `main`.
*/
void initialize();
/**
* Register a new benchmark.
*/

View File

@@ -18,6 +18,7 @@
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
@@ -34,6 +35,15 @@ benchmark_manager &benchmark_manager::get()
return the_manager;
}
void benchmark_manager::initialize()
{
const auto& mgr = device_manager::get();
for (auto& bench : m_benchmarks)
{
bench->set_devices(mgr.get_devices());
}
}
benchmark_base &benchmark_manager::add(std::unique_ptr<benchmark_base> bench)
{
m_benchmarks.push_back(std::move(bench));

View File

@@ -25,6 +25,7 @@
#include <nvbench/option_parser.cuh>
#include <nvbench/printer_base.cuh>
#include <cstdlib>
#include <iostream>
#define NVBENCH_MAIN \
@@ -58,10 +59,22 @@
nvbench::option_parser parser; \
parser.parse(argc, argv)
// See NVIDIA/NVBench#136 for CUDA_MODULE_LOADING
#ifdef _MSC_VER
#define NVBENCH_INITIALIZE_CUDA_ENV _putenv_s("CUDA_MODULE_LOADING", "EAGER")
#else
#define NVBENCH_INITIALIZE_CUDA_ENV setenv("CUDA_MODULE_LOADING", "EAGER", 1)
#endif
#define NVBENCH_INITIALIZE_BENCHMARKS() \
nvbench::benchmark_manager::get().initialize()
#define NVBENCH_MAIN_BODY(argc, argv) \
do \
{ \
NVBENCH_INITIALIZE_CUDA_ENV; \
NVBENCH_INITIALIZE_DRIVER_API; \
NVBENCH_INITIALIZE_BENCHMARKS(); \
NVBENCH_MAIN_PARSE(argc, argv); \
auto &printer = parser.get_printer(); \
\

View File

@@ -22,6 +22,7 @@
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/csv_printer.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/git_revision.cuh>
#include <nvbench/json_printer.cuh>
#include <nvbench/markdown_printer.cuh>