Add multi-cuda wheel build (#289)

Co-authored-by: Ashwin Srinath <shwina@users.noreply.github.com> Co-authored-by: Nader Al Awar <naderalawar@gmail.com>
2026-04-20 06:48:53 +00:00 · 2026-01-28 10:37:55 -05:00
parent f3fa93f388
commit a681e2185d
13 changed files with 379 additions and 288 deletions
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -25,19 +25,23 @@ CPMAddPackage(

 CPMAddPackage("gh:pybind/pybind11@3.0.1")

+# Determine CUDA major version for directory structure
+set(CUDA_VERSION_DIR "cu${CUDAToolkit_VERSION_MAJOR}")
+message(STATUS "Building extension for CUDA ${CUDAToolkit_VERSION_MAJOR}, output directory: cuda/bench/${CUDA_VERSION_DIR}")
+
 add_library(_nvbench MODULE src/py_nvbench.cpp)
 target_include_directories(_nvbench PRIVATE ${Python_INCLUDE_DIRS})
 target_link_libraries(_nvbench PUBLIC nvbench::nvbench)
-target_link_libraries(_nvbench PRIVATE CUDA::cudart_static pybind11::headers)
+target_link_libraries(_nvbench PRIVATE CUDA::cudart_static CUDA::cuda_driver pybind11::headers)

 set_target_properties(_nvbench PROPERTIES INSTALL_RPATH "$ORIGIN")
 set_target_properties(_nvbench PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
 set_target_properties(_nvbench PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(_nvbench PROPERTIES PREFIX "" SUFFIX "${PYTHON_MODULE_EXTENSION}")

-install(TARGETS _nvbench DESTINATION cuda/bench)
+install(TARGETS _nvbench DESTINATION cuda/bench/${CUDA_VERSION_DIR})

 # Determine target that nvbench::nvbench is an alias of,
 # necessary because ALIAS targets cannot be installed
 get_target_property(_aliased_target_name nvbench::nvbench ALIASED_TARGET)
-install(IMPORTED_RUNTIME_ARTIFACTS ${_aliased_target_name} DESTINATION cuda/bench)
+install(IMPORTED_RUNTIME_ARTIFACTS ${_aliased_target_name} DESTINATION cuda/bench/${CUDA_VERSION_DIR})
--- a/python/cuda/bench/init.py
+++ b/python/cuda/bench/init.py
@@ -14,6 +14,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

+import importlib
 import importlib.metadata
 import warnings

@@ -31,29 +32,63 @@ except Exception as e:
        f"Version is set to fall-back value '{__version__}' instead."
    )

+
+# Detect CUDA runtime version and load appropriate extension
+def _get_cuda_major_version():
+    """Detect the CUDA runtime major version."""
+    try:
+        import cuda.bindings
+
+        # Get CUDA version from cuda-bindings package version
+        # cuda-bindings version is in format like "12.9.1" or "13.0.0"
+        version_str = cuda.bindings.__version__
+        major = int(version_str.split(".")[0])
+        return major
+    except ImportError:
+        raise ImportError(
+            "cuda-bindings is required for runtime CUDA version detection. "
+            "Install with: pip install pynvbench[cu12] or pip install pynvbench[cu13]"
+        )
+
+
+_cuda_major = _get_cuda_major_version()
+_extra_name = f"cu{_cuda_major}"
+_module_fullname = f"cuda.bench.{_extra_name}._nvbench"
+
+try:
+    _nvbench_module = importlib.import_module(_module_fullname)
+except ImportError as e:
+    raise ImportError(
+        f"No pynvbench extension found for CUDA {_cuda_major}.x. "
+        f"This wheel may not include support for your CUDA version. "
+        f"Supported CUDA versions: 12, 13. "
+        f"Original error: {e}"
+    )
+
+# Load required NVIDIA libraries
 for libname in ("cupti", "nvperf_target", "nvperf_host"):
    load_nvidia_dynamic_lib(libname)

-from cuda.bench._nvbench import (  # noqa: E402
-    Benchmark as Benchmark,
-)
-from cuda.bench._nvbench import (  # noqa: E402
-    CudaStream as CudaStream,
-)
-from cuda.bench._nvbench import (  # noqa: E402
-    Launch as Launch,
-)
-from cuda.bench._nvbench import (  # noqa: E402
-    NVBenchRuntimeError as NVBenchRuntimeError,
-)
-from cuda.bench._nvbench import (  # noqa: E402
-    State as State,
-)
-from cuda.bench._nvbench import (  # noqa: E402
-    register as register,
-)
-from cuda.bench._nvbench import (  # noqa: E402
-    run_all_benchmarks as run_all_benchmarks,
-)
+# Import and expose all public symbols from the CUDA-specific extension
+Benchmark = _nvbench_module.Benchmark
+CudaStream = _nvbench_module.CudaStream
+Launch = _nvbench_module.Launch
+NVBenchRuntimeError = _nvbench_module.NVBenchRuntimeError
+State = _nvbench_module.State
+register = _nvbench_module.register
+run_all_benchmarks = _nvbench_module.run_all_benchmarks
+test_cpp_exception = _nvbench_module.test_cpp_exception
+test_py_exception = _nvbench_module.test_py_exception

-del load_nvidia_dynamic_lib
+# Expose the module as _nvbench for backward compatibility (e.g., for tests)
+_nvbench = _nvbench_module
+
+# Clean up internal symbols
+del (
+    load_nvidia_dynamic_lib,
+    _nvbench_module,
+    _cuda_major,
+    _extra_name,
+    _module_fullname,
+    _get_cuda_major_version,
+)
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -22,22 +22,26 @@ classifiers = [
 ]
 requires-python = ">=3.10"
 dependencies = [
-  # pathfinder
+  # pathfinder for finding CUDA libraries
  "cuda-pathfinder",
-
-  # Library expects to find shared libraries
-  # libcupti, libnvperf_target, libnvperf_host
-  # pathfinder is used to find it in the Python layout
-  "nvidia-cuda-cupti-cu12",
-
-  # The shared library
-  # libnvidia-ml must be installed system-wide
-  # (Debian package provider: libnvidia-compute)
 ]
 dynamic = ["version"]
 readme = { file = "README.md", content-type = "text/markdown" }

 [project.optional-dependencies]
+# CUDA 12.x dependencies
+cu12 = ["cuda-bindings>=12.0.0,<13.0.0", "nvidia-cuda-cupti-cu12"]
+
+# CUDA 13.x dependencies
+cu13 = ["cuda-bindings>=13.0.0,<14.0.0", "nvidia-cuda-cupti>=13.0"]
+
+# Test dependencies for CUDA 12
+test-cu12 = ["pynvbench[cu12]", "pytest", "cupy-cuda12x", "numba"]
+
+# Test dependencies for CUDA 13
+test-cu13 = ["pynvbench[cu13]", "pytest", "cupy-cuda13x", "numba"]
+
+# Generic test dependencies (defaults to CUDA 12)
 test = ["pytest", "cupy-cuda12x", "numba"]

 [project.urls]
--- a/python/src/py_nvbench.cpp
+++ b/python/src/py_nvbench.cpp
@@ -272,7 +272,11 @@ std::unique_ptr<GlobalBenchmarkRegistry, py::nodelete> global_registry{};
 // If you modify these bindings, please be sure to update the
 // corresponding type hints in ``../cuda/nvbench/__init__.pyi``

-PYBIND11_MODULE(_nvbench, m)
+#ifndef PYBIND11_MODULE_NAME
+#define PYBIND11_MODULE_NAME _nvbench
+#endif
+
+PYBIND11_MODULE(PYBIND11_MODULE_NAME, m)
 {
  // == STEP 1
  // Set environment variable CUDA_MODULE_LOADING=EAGER