Fix type annotations in cuda.nvbench, and in examples

This commit is contained in:
Oleksandr Pavlyk
2025-07-22 13:02:22 -05:00
parent 13ad115ca3
commit a535a1d173
6 changed files with 52 additions and 43 deletions

View File

@@ -15,7 +15,6 @@
# limitations under the License.
import sys
from collections.abc import Callable
import cuda.nvbench as nvbench
import numpy as np
@@ -26,7 +25,7 @@ def as_cuda_Stream(cs: nvbench.CudaStream) -> cuda.cudadrv.driver.Stream:
return cuda.external_stream(cs.addressof())
def make_kernel(items_per_thread: int) -> Callable:
def make_kernel(items_per_thread: int) -> cuda.compiler.AutoJitCUDAKernel:
@cuda.jit
def kernel(stride: np.uintp, elements: np.uintp, in_arr, out_arr):
tid = cuda.grid(1)

View File

@@ -1,6 +1,6 @@
import ctypes
import sys
from typing import Optional
from typing import Dict, Optional, Tuple
import cuda.cccl.headers as headers
import cuda.core.experimental as core
@@ -134,7 +134,7 @@ def copy_sweep_grid_shape(state: nvbench.State):
def copy_type_sweep(state: nvbench.State):
type_id = state.get_int64("TypeID")
types_map = {
types_map: Dict[int, Tuple[type, str]] = {
0: (ctypes.c_uint8, "cuda::std::uint8_t"),
1: (ctypes.c_uint16, "cuda::std::uint16_t"),
2: (ctypes.c_uint32, "cuda::std::uint32_t"),
@@ -148,7 +148,7 @@ def copy_type_sweep(state: nvbench.State):
# Number of elements in 256MiB
nbytes = 256 * 1024 * 1024
num_values = nbytes // ctypes.sizeof(value_ctype(0))
num_values = nbytes // ctypes.sizeof(value_ctype)
state.add_element_count(num_values)
state.add_global_memory_reads(nbytes)

View File

@@ -27,7 +27,7 @@ def as_cccl_Stream(cs: nvbench.CudaStream) -> CCCLStream:
def as_cp_ExternalStream(
cs: nvbench.CudaStream, dev_id: int = -1
cs: nvbench.CudaStream, dev_id: int | None = -1
) -> cp.cuda.ExternalStream:
h = cs.addressof()
return cp.cuda.ExternalStream(h, dev_id)

View File

@@ -5,7 +5,7 @@ import cupy as cp
def as_cp_ExternalStream(
cs: nvbench.CudaStream, dev_id: int = -1
cs: nvbench.CudaStream, dev_id: int | None = -1
) -> cp.cuda.ExternalStream:
h = cs.addressof()
return cp.cuda.ExternalStream(h, dev_id)

View File

@@ -15,7 +15,6 @@
# limitations under the License.
import sys
from collections.abc import Callable
import cuda.nvbench as nvbench
import numpy as np
@@ -26,7 +25,7 @@ def as_cuda_Stream(cs: nvbench.CudaStream) -> cuda.cudadrv.driver.Stream:
return cuda.external_stream(cs.addressof())
def make_kernel(items_per_thread: int) -> Callable:
def make_kernel(items_per_thread: int) -> cuda.compiler.AutoJitCUDAKernel:
@cuda.jit
def kernel(stride: np.uintp, elements: np.uintp, in_arr, out_arr):
tid = cuda.grid(1)