From 76cbbcc8f92ce0b8f8c01b5165d7ae456e262fe8 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 4 Feb 2022 17:20:40 -0500 Subject: [PATCH] Update benchmarks.md --- docs/benchmarks.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 09820f9..5ec0c22 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -11,6 +11,18 @@ void my_benchmark(nvbench::state& state) { NVBENCH_BENCH(my_benchmark); ``` +The following example shows how to benchmark functions that do not expose stream parameters: +```cpp +void my_benchmark(nvbench::state& state) { + state.set_cuda_stream(nvbench::cuda_stream{cudaStreamDefault, false}); + state.exec([](nvbench::launch&) { + my_func(); // a host API invoking GPU kernels without taking an explicit stream + my_kernel<<>>(); // or a kernel launched with the default stream + }); +} +NVBENCH_BENCH(my_benchmark); +``` + There are three main components in the definition of a benchmark: - A `KernelGenerator` callable (`my_benchmark` above)