From a2f88ff790cfe0c6c7c08a429c44eaed4298f792 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sat, 6 Apr 2024 11:03:42 -0400 Subject: [PATCH] Set `CUDA_MODULE_LOADING=EAGER` before `main`. (#157) * Set `CUDA_MODULE_LOADING=EAGER` before `main`. Fixes #136 * Portability for `setenv`. * Remove pre-main CUDART usage and setup env in main. * Fail examples if they deadlock. This is the best way we have to diagnose a regression for NVIDIA/nvbench#136. * Add an initialize method to benchmark_manager for CUDA-related setup. Benchmarks are created statically, so their constructors cannot call the CUDA APIs without breaking the CUDA_MODULE_LOAD setup. This method is called from `main` after the environment has been configured. --- examples/CMakeLists.txt | 6 ++++++ nvbench/benchmark_base.cuh | 2 -- nvbench/benchmark_manager.cuh | 9 +++++++++ nvbench/benchmark_manager.cxx | 10 ++++++++++ nvbench/main.cuh | 13 +++++++++++++ nvbench/option_parser.cu | 1 + 6 files changed, 39 insertions(+), 2 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 8a87267..b0f288c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -26,6 +26,12 @@ foreach(example_src IN LISTS example_srcs) COMMAND "$" --timeout 0.1 --min-time 1e-5 ) + # These should not deadlock. If they do, it may be that the CUDA context was created before + # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136. + set_tests_properties(${example_name} PROPERTIES + FAIL_REGULAR_EXPRESSION "Possible Deadlock Detected" + ) + add_dependencies(nvbench.example.all ${example_name}) endforeach() diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index 55673b0..170b942 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -53,7 +52,6 @@ struct benchmark_base template explicit benchmark_base(TypeAxes type_axes) : m_axes(type_axes) - , m_devices(nvbench::device_manager::get().get_devices()) {} virtual ~benchmark_base(); diff --git a/nvbench/benchmark_manager.cuh b/nvbench/benchmark_manager.cuh index 7316445..51fab18 100644 --- a/nvbench/benchmark_manager.cuh +++ b/nvbench/benchmark_manager.cuh @@ -38,6 +38,15 @@ struct benchmark_manager */ [[nodiscard]] static benchmark_manager &get(); + /** + * Setup any default values for the benchmarks. Invoked from `main`. + * + * Specifically, any CUDA calls (e.g. cudaGetDeviceProperties, etc) needed to initialize the + * benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA + * environment in `main`. + */ + void initialize(); + /** * Register a new benchmark. */ diff --git a/nvbench/benchmark_manager.cxx b/nvbench/benchmark_manager.cxx index cd84f61..5df702d 100644 --- a/nvbench/benchmark_manager.cxx +++ b/nvbench/benchmark_manager.cxx @@ -18,6 +18,7 @@ #include +#include #include #include @@ -34,6 +35,15 @@ benchmark_manager &benchmark_manager::get() return the_manager; } +void benchmark_manager::initialize() +{ + const auto& mgr = device_manager::get(); + for (auto& bench : m_benchmarks) + { + bench->set_devices(mgr.get_devices()); + } +} + benchmark_base &benchmark_manager::add(std::unique_ptr bench) { m_benchmarks.push_back(std::move(bench)); diff --git a/nvbench/main.cuh b/nvbench/main.cuh index 0ba82d7..bcdced5 100644 --- a/nvbench/main.cuh +++ b/nvbench/main.cuh @@ -25,6 +25,7 @@ #include #include +#include #include #define NVBENCH_MAIN \ @@ -58,10 +59,22 @@ nvbench::option_parser parser; \ parser.parse(argc, argv) +// See NVIDIA/NVBench#136 for CUDA_MODULE_LOADING +#ifdef _MSC_VER +#define NVBENCH_INITIALIZE_CUDA_ENV _putenv_s("CUDA_MODULE_LOADING", "EAGER") +#else +#define NVBENCH_INITIALIZE_CUDA_ENV setenv("CUDA_MODULE_LOADING", "EAGER", 1) +#endif + +#define NVBENCH_INITIALIZE_BENCHMARKS() \ + nvbench::benchmark_manager::get().initialize() + #define NVBENCH_MAIN_BODY(argc, argv) \ do \ { \ + NVBENCH_INITIALIZE_CUDA_ENV; \ NVBENCH_INITIALIZE_DRIVER_API; \ + NVBENCH_INITIALIZE_BENCHMARKS(); \ NVBENCH_MAIN_PARSE(argc, argv); \ auto &printer = parser.get_printer(); \ \ diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index 6dba745..1edac87 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include