Remove warm-up invocations outside of launcher in examples/throughout and auto_throughput

This commit is contained in:
Oleksandr Pavlyk
2025-08-04 12:14:44 -05:00
parent d8b0acc8d4
commit 584f48ac97
2 changed files with 0 additions and 14 deletions

View File

@@ -58,13 +58,6 @@ def throughput_bench(state: nvbench.State) -> None:
krn = make_throughput_kernel(ipt)
# warm-up call ensures that kernel is loaded into context
# before blocking kernel is launched. Kernel loading may cause
# a synchronization to occur.
krn[blocks_in_grid, threads_per_block, alloc_stream, 0](
stride, elements, inp_arr, out_arr
)
def launcher(launch: nvbench.Launch):
exec_stream = as_cuda_stream(launch.get_stream())
krn[blocks_in_grid, threads_per_block, exec_stream, 0](

View File

@@ -59,13 +59,6 @@ def throughput_bench(state: nvbench.State) -> None:
krn = make_throughput_kernel(ipt)
# warm-up call ensures that kernel is loaded into context
# before blocking kernel is launched. Kernel loading may
# cause synchronization to occur.
krn[blocks_in_grid, threads_per_block, alloc_stream, 0](
stride, elements, inp_arr, out_arr
)
def launcher(launch: nvbench.Launch):
exec_stream = as_cuda_stream(launch.get_stream())
krn[blocks_in_grid, threads_per_block, exec_stream, 0](