From 6df5fc8c677efced31113b134ff0b35fc1430c7a Mon Sep 17 00:00:00 2001
From: Nader Al Awar <naderalawar@gmail.com>
Date: Mon, 2 Feb 2026 15:37:13 -0600
Subject: [PATCH 1/3] Remove cupti from cuda-bench dependencies

---
 python/cuda/bench/__init__.py      | 8 --------
 python/cuda/bench/__init__.pyi     | 6 ------
 python/examples/auto_throughput.py | 1 -
 python/pyproject.toml              | 8 ++------
 python/src/py_nvbench.cpp          | 6 ------
 5 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/python/cuda/bench/__init__.py b/python/cuda/bench/__init__.py
index c02c14c..f3f7201 100644
--- a/python/cuda/bench/__init__.py
+++ b/python/cuda/bench/__init__.py
@@ -18,10 +18,6 @@ import importlib
 import importlib.metadata
 import warnings
 
-from cuda.pathfinder import (  # type: ignore[import-not-found]
-    load_nvidia_dynamic_lib,
-)
-
 try:
     __version__ = importlib.metadata.version("cuda-bench")
 except Exception as e:
@@ -65,10 +61,6 @@ except ImportError as e:
         f"Original error: {e}"
     )
 
-# Load required NVIDIA libraries
-for libname in ("cupti", "nvperf_target", "nvperf_host"):
-    load_nvidia_dynamic_lib(libname)
-
 # Import and expose all public symbols from the CUDA-specific extension
 Benchmark = _nvbench_module.Benchmark
 CudaStream = _nvbench_module.CudaStream
diff --git a/python/cuda/bench/__init__.pyi b/python/cuda/bench/__init__.pyi
index 86681fc..25f7fca 100644
--- a/python/cuda/bench/__init__.pyi
+++ b/python/cuda/bench/__init__.pyi
@@ -240,12 +240,6 @@ class State:
     def set_blocking_kernel_timeout(self, duration: SupportsFloat) -> None:
         "Set time-out value for execution of blocking kernel, in seconds"
         ...
-    def collect_cupti_metrics(self) -> None:
-        "Request NVBench to record CUPTI metrics while running benchmark for this configuration"
-        ...
-    def is_cupti_required(self) -> bool:
-        "True if (some) CUPTI metrics are being collected"
-        ...
     def exec(
         self,
         fn: Callable[[Launch], None],
diff --git a/python/examples/auto_throughput.py b/python/examples/auto_throughput.py
index db4fa19..5d41b09 100644
--- a/python/examples/auto_throughput.py
+++ b/python/examples/auto_throughput.py
@@ -51,7 +51,6 @@ def throughput_bench(state: bench.State) -> None:
     out_arr = cuda.device_array(elements * ipt, dtype=np.int32, stream=alloc_stream)
 
     state.add_element_count(elements, column_name="Elements")
-    state.collect_cupti_metrics()
 
     threads_per_block = 256
     blocks_in_grid = (elements + threads_per_block - 1) // threads_per_block
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 4d288ac..f7ddf78 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -21,19 +21,15 @@ classifiers = [
   "Operating System :: POSIX :: Linux",
 ]
 requires-python = ">=3.10"
-dependencies = [
-  # pathfinder for finding CUDA libraries
-  "cuda-pathfinder",
-]
 dynamic = ["version"]
 readme = { file = "README.md", content-type = "text/markdown" }
 
 [project.optional-dependencies]
 # CUDA 12.x dependencies
-cu12 = ["cuda-bindings>=12.0.0,<13.0.0", "nvidia-cuda-cupti-cu12"]
+cu12 = ["cuda-bindings>=12.0.0,<13.0.0"]
 
 # CUDA 13.x dependencies
-cu13 = ["cuda-bindings>=13.0.0,<14.0.0", "nvidia-cuda-cupti>=13.0"]
+cu13 = ["cuda-bindings>=13.0.0,<14.0.0"]
 
 # Test dependencies for CUDA 12
 test-cu12 = ["cuda-bench[cu12]", "pytest", "cupy-cuda12x", "numba"]
diff --git a/python/src/py_nvbench.cpp b/python/src/py_nvbench.cpp
index 68a2f26..67c20e3e 100644
--- a/python/src/py_nvbench.cpp
+++ b/python/src/py_nvbench.cpp
@@ -550,13 +550,11 @@ PYBIND11_MODULE(PYBIND11_MODULE_NAME, m)
   //        nvbench::state::collect_stores_efficiency
   //        nvbench::state::collect_loads_efficiency
   //        nvbench::state::collect_dram_throughput
-  //        nvbench::state::collect_cupti_metrics
   //        nvbench::state::is_l1_hit_rate_collected
   //        nvbench::state::is_l2_hit_rate_collected
   //        nvbench::state::is_stores_efficiency_collected
   //        nvbench::state::is_loads_efficiency_collected
   //        nvbench::state::is_dram_throughput_collected
-  //        nvbench::state::is_cupti_required
   //        nvbench::state::add_summary
   //        nvbench::state::get_summary
   //        nvbench::state::get_summaries
@@ -678,10 +676,6 @@ PYBIND11_MODULE(PYBIND11_MODULE_NAME, m)
                   &nvbench::state::set_blocking_kernel_timeout,
                   py::arg("duration"));
 
-  pystate_cls.def("collect_cupti_metrics", &nvbench::state::collect_cupti_metrics);
-
-  pystate_cls.def("is_cupti_required", &nvbench::state::is_cupti_required);
-
   pystate_cls.def(
     "exec",
     [](nvbench::state &state, py::object py_launcher_fn, bool batched, bool sync) {

From f2d57301043ed712c6830213111fadd00d95876b Mon Sep 17 00:00:00 2001
From: Nader Al Awar <naderalawar@gmail.com>
Date: Mon, 2 Feb 2026 16:03:15 -0600
Subject: [PATCH 2/3] Disable CUPTI in cmake file

---
 python/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 7f8548c..6d37350 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -19,7 +19,9 @@ include(${_cpm_download_location})
 CPMAddPackage(
    NAME nvbench
    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..
-   OPTIONS "NVBench_INSTALL_RULES ON"
+   OPTIONS
+     "NVBench_INSTALL_RULES ON"
+     "NVBench_ENABLE_CUPTI OFF"
    FIND_PACKAGE_ARGS CONFIG REQUIRED
 )
 

From 4fa4296810b97f4349f954e588fcaecb6b47f945 Mon Sep 17 00:00:00 2001
From: Nader Al Awar <naderalawar@gmail.com>
Date: Mon, 2 Feb 2026 16:43:45 -0600
Subject: [PATCH 3/3] Remove cuda.pathfinder function

---
 python/cuda/bench/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cuda/bench/__init__.py b/python/cuda/bench/__init__.py
index f3f7201..2a3aa0a 100644
--- a/python/cuda/bench/__init__.py
+++ b/python/cuda/bench/__init__.py
@@ -77,7 +77,6 @@ _nvbench = _nvbench_module
 
 # Clean up internal symbols
 del (
-    load_nvidia_dynamic_lib,
     _nvbench_module,
     _cuda_major,
     _extra_name,