From 584f48ac977475a66a2b0d285a4b58f4038c355e Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 4 Aug 2025 12:14:44 -0500
Subject: [PATCH] Remove warm-up invocations outside of launcher in
 examples/throughout and auto_throughput

---
 python/examples/auto_throughput.py | 7 -------
 python/examples/throughput.py      | 7 -------
 2 files changed, 14 deletions(-)

diff --git a/python/examples/auto_throughput.py b/python/examples/auto_throughput.py
index 1b6e663..88691ec 100644
--- a/python/examples/auto_throughput.py
+++ b/python/examples/auto_throughput.py
@@ -58,13 +58,6 @@ def throughput_bench(state: nvbench.State) -> None:
 
     krn = make_throughput_kernel(ipt)
 
-    # warm-up call ensures that kernel is loaded into context
-    # before blocking kernel is launched. Kernel loading may cause
-    # a synchronization to occur.
-    krn[blocks_in_grid, threads_per_block, alloc_stream, 0](
-        stride, elements, inp_arr, out_arr
-    )
-
     def launcher(launch: nvbench.Launch):
         exec_stream = as_cuda_stream(launch.get_stream())
         krn[blocks_in_grid, threads_per_block, exec_stream, 0](
diff --git a/python/examples/throughput.py b/python/examples/throughput.py
index 5984126..890c372 100644
--- a/python/examples/throughput.py
+++ b/python/examples/throughput.py
@@ -59,13 +59,6 @@ def throughput_bench(state: nvbench.State) -> None:
 
     krn = make_throughput_kernel(ipt)
 
-    # warm-up call ensures that kernel is loaded into context
-    # before blocking kernel is launched. Kernel loading may
-    # cause synchronization to occur.
-    krn[blocks_in_grid, threads_per_block, alloc_stream, 0](
-        stride, elements, inp_arr, out_arr
-    )
-
     def launcher(launch: nvbench.Launch):
         exec_stream = as_cuda_stream(launch.get_stream())
         krn[blocks_in_grid, threads_per_block, exec_stream, 0](