nvbench/python/cuda/bench/__init__.pyi

# Copyright 2025-2026 NVIDIA Corporation
#
#  Licensed under the Apache License, Version 2.0 with the LLVM exception
#  (the "License"); you may not use this file except in compliance with
#  the License.
#
#  You may obtain a copy of the License at
#
#      http://llvm.org/foundation/relicensing/LICENSE.txt
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

# ============================================
# PLEASE KEEP IN SYNC WITH py_nvbench.cpp FILE
# ============================================
# Please be sure to keep these type hints and docstring in sync
# with the pybind11 bindings in ``../../src/py_nvbench.cpp``

# Use mypy's stubgen to auto-generate stubs using
# ``stubgen -m cuda.nvbench._nvbench`` and compare
# stubs in generated out/cuda/nvbench/_nvbench.pyi
# with definitions given here.

from array import array
from collections.abc import Callable, Iterator, Sequence
from typing import (
    Any,
    Optional,
    Self,
    SupportsFloat,
    SupportsInt,
    TypeVar,
    Union,
    overload,
)

ResultT = TypeVar("ResultT")
_SummaryValue = int | float | str
_SummaryData = _SummaryValue | dict[str, _SummaryValue]

class CudaStream:
    def __cuda_stream__(self) -> tuple[int, int]: ...
    def addressof(self) -> int: ...

class Benchmark:
    def get_name(self) -> str: ...
    def add_int64_axis(self, name: str, values: Sequence[SupportsInt]) -> Self: ...
    def add_int64_power_of_two_axis(
        self, name: str, values: Sequence[SupportsInt]
    ) -> Self: ...
    def add_float64_axis(self, name: str, values: Sequence[SupportsFloat]) -> Self: ...
    def add_string_axis(self, name: str, values: Sequence[str]) -> Self: ...
    def set_name(self, name: str) -> Self: ...
    def set_run_once(self, v: bool) -> Self: ...
    def set_skip_time(self, duration_seconds: SupportsFloat) -> Self: ...
    def set_throttle_recovery_delay(self, delay_seconds: SupportsFloat) -> Self: ...
    def set_throttle_threshold(self, threshold: SupportsFloat) -> Self: ...
    def set_timeout(self, duration_seconds: SupportsFloat) -> Self: ...
    def set_stopping_criterion(self, criterion: str) -> Self: ...
    def set_criterion_param_float64(self, name: str, value: SupportsFloat) -> Self: ...
    def set_criterion_param_int64(self, name: str, value: SupportsInt) -> Self: ...
    def set_criterion_param_string(self, name: str, value: str) -> Self: ...
    def set_min_samples(self, count: SupportsInt) -> Self: ...
    def set_is_cpu_only(self, is_cpu_only: bool) -> Self: ...

class Launch:
    def get_stream(self) -> CudaStream: ...

class State:
    def has_device(self) -> bool: ...
    def has_printers(self) -> bool: ...
    def get_device(self) -> Union[int, None]: ...
    def get_stream(self) -> CudaStream: ...
    def get_int64(self, name: str) -> int: ...
    def get_int64_or_default(self, name: str, default_value: SupportsInt) -> int: ...
    def get_float64(self, name: str) -> float: ...
    def get_float64_or_default(
        self, name: str, default_value: SupportsFloat
    ) -> float: ...
    def get_string(self, name: str) -> str: ...
    def get_string_or_default(self, name: str, default_value: str) -> str: ...
    def add_element_count(
        self, count: SupportsInt, column_name: Optional[str] = None
    ) -> None: ...
    def set_element_count(self, count: SupportsInt) -> None: ...
    def get_element_count(self) -> int: ...
    def skip(self, reason: str) -> None: ...
    def is_skipped(self) -> bool: ...
    def get_skip_reason(self) -> str: ...
    def add_global_memory_reads(
        self, nbytes: SupportsInt, /, column_name: str = ""
    ) -> None: ...
    def add_global_memory_writes(
        self, nbytes: SupportsInt, /, column_name: str = ""
    ) -> None: ...
    def get_benchmark(self) -> Benchmark: ...
    def get_throttle_threshold(self) -> float: ...
    def set_throttle_threshold(self, threshold_fraction: SupportsFloat) -> None: ...
    def get_min_samples(self) -> int: ...
    def set_min_samples(self, min_samples_count: SupportsInt) -> None: ...
    def get_disable_blocking_kernel(self) -> bool: ...
    def set_disable_blocking_kernel(self, flag: bool) -> None: ...
    def get_run_once(self) -> bool: ...
    def set_run_once(self, run_once_flag: bool) -> None: ...
    def get_timeout(self) -> float: ...
    def set_timeout(self, duration: SupportsFloat) -> None: ...
    def get_blocking_kernel_timeout(self) -> float: ...
    def set_blocking_kernel_timeout(self, duration: SupportsFloat) -> None: ...
    def exec(
        self,
        fn: Callable[[Launch], None],
        /,
        *,
        batched: Optional[bool] = True,
        sync: Optional[bool] = False,
    ): ...
    def get_short_description(self) -> str: ...
    def add_summary(
        self, column_name: str, value: Union[SupportsInt, SupportsFloat, str]
    ) -> None: ...
    def get_axis_values(self) -> dict[str, int | float | str]: ...
    def get_axis_values_as_string(self, color: bool = ...) -> str: ...
    def get_stopping_criterion(self) -> str: ...

def register(fn: Callable[[State], None]) -> Benchmark: ...
def run_all_benchmarks(argv: Sequence[str]) -> None: ...

class NVBenchRuntimeError(RuntimeError): ...

class SubBenchState:
    state_name: str
    summaries: dict[str, _SummaryData]
    samples: array | None
    frequencies: array | None
    bw: float | None
    point: dict[str, str]
    def name(self) -> str: ...
    def center(self, estimator: Callable[[array], ResultT]) -> ResultT | None: ...
    def center_with_frequencies(
        self, estimator: Callable[[array, array], ResultT]
    ) -> ResultT | None: ...

class SubBenchResult:
    states: list[SubBenchState]
    def __len__(self) -> int: ...
    @overload
    def __getitem__(self, state_index: int) -> SubBenchState: ...
    @overload
    def __getitem__(self, state_index: slice) -> list[SubBenchState]: ...
    def __iter__(self) -> Iterator[SubBenchState]: ...
    def centers(
        self, estimator: Callable[[array], ResultT]
    ) -> dict[str, ResultT | None]: ...
    def centers_with_frequencies(
        self, estimator: Callable[[array, array], ResultT]
    ) -> dict[str, ResultT | None]: ...

class BenchResult:
    metadata: Any
    subbenches: dict[str, SubBenchResult]
    def __init__(
        self,
        json_fn: str | None = None,
        *,
        metadata: Any = None,
        parse: bool = True,
    ) -> None: ...
    def __getitem__(self, subbench_name: str) -> SubBenchResult: ...
    def centers(
        self, estimator: Callable[[array], ResultT]
    ) -> dict[str, dict[str, ResultT | None]]: ...
    def centers_with_frequencies(
        self, estimator: Callable[[array, array], ResultT]
    ) -> dict[str, dict[str, ResultT | None]]: ...