Remove cupti from cuda-bench dependencies

This commit is contained in:
Nader Al Awar
2026-02-02 15:37:13 -06:00
parent 34a089f805
commit 6df5fc8c67
5 changed files with 2 additions and 27 deletions

View File

@@ -18,10 +18,6 @@ import importlib
import importlib.metadata
import warnings
from cuda.pathfinder import ( # type: ignore[import-not-found]
load_nvidia_dynamic_lib,
)
try:
__version__ = importlib.metadata.version("cuda-bench")
except Exception as e:
@@ -65,10 +61,6 @@ except ImportError as e:
f"Original error: {e}"
)
# Load required NVIDIA libraries
for libname in ("cupti", "nvperf_target", "nvperf_host"):
load_nvidia_dynamic_lib(libname)
# Import and expose all public symbols from the CUDA-specific extension
Benchmark = _nvbench_module.Benchmark
CudaStream = _nvbench_module.CudaStream

View File

@@ -240,12 +240,6 @@ class State:
def set_blocking_kernel_timeout(self, duration: SupportsFloat) -> None:
"Set time-out value for execution of blocking kernel, in seconds"
...
def collect_cupti_metrics(self) -> None:
"Request NVBench to record CUPTI metrics while running benchmark for this configuration"
...
def is_cupti_required(self) -> bool:
"True if (some) CUPTI metrics are being collected"
...
def exec(
self,
fn: Callable[[Launch], None],

View File

@@ -51,7 +51,6 @@ def throughput_bench(state: bench.State) -> None:
out_arr = cuda.device_array(elements * ipt, dtype=np.int32, stream=alloc_stream)
state.add_element_count(elements, column_name="Elements")
state.collect_cupti_metrics()
threads_per_block = 256
blocks_in_grid = (elements + threads_per_block - 1) // threads_per_block

View File

@@ -21,19 +21,15 @@ classifiers = [
"Operating System :: POSIX :: Linux",
]
requires-python = ">=3.10"
dependencies = [
# pathfinder for finding CUDA libraries
"cuda-pathfinder",
]
dynamic = ["version"]
readme = { file = "README.md", content-type = "text/markdown" }
[project.optional-dependencies]
# CUDA 12.x dependencies
cu12 = ["cuda-bindings>=12.0.0,<13.0.0", "nvidia-cuda-cupti-cu12"]
cu12 = ["cuda-bindings>=12.0.0,<13.0.0"]
# CUDA 13.x dependencies
cu13 = ["cuda-bindings>=13.0.0,<14.0.0", "nvidia-cuda-cupti>=13.0"]
cu13 = ["cuda-bindings>=13.0.0,<14.0.0"]
# Test dependencies for CUDA 12
test-cu12 = ["cuda-bench[cu12]", "pytest", "cupy-cuda12x", "numba"]

View File

@@ -550,13 +550,11 @@ PYBIND11_MODULE(PYBIND11_MODULE_NAME, m)
// nvbench::state::collect_stores_efficiency
// nvbench::state::collect_loads_efficiency
// nvbench::state::collect_dram_throughput
// nvbench::state::collect_cupti_metrics
// nvbench::state::is_l1_hit_rate_collected
// nvbench::state::is_l2_hit_rate_collected
// nvbench::state::is_stores_efficiency_collected
// nvbench::state::is_loads_efficiency_collected
// nvbench::state::is_dram_throughput_collected
// nvbench::state::is_cupti_required
// nvbench::state::add_summary
// nvbench::state::get_summary
// nvbench::state::get_summaries
@@ -678,10 +676,6 @@ PYBIND11_MODULE(PYBIND11_MODULE_NAME, m)
&nvbench::state::set_blocking_kernel_timeout,
py::arg("duration"));
pystate_cls.def("collect_cupti_metrics", &nvbench::state::collect_cupti_metrics);
pystate_cls.def("is_cupti_required", &nvbench::state::is_cupti_required);
pystate_cls.def(
"exec",
[](nvbench::state &state, py::object py_launcher_fn, bool batched, bool sync) {