diff --git a/README.md b/README.md index 4f975d9..202ed73 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ various NVBench features and usecases: - [Reporting item/sec and byte/sec throughput statistics](examples/throughput.cu) - [Skipping benchmark configurations](examples/skip.cu) - [Benchmarking on a specific stream](examples/stream.cu) +- [Adding / hiding columns (summaries) in markdown output](examples/summaries.cu) - [Benchmarks that sync CUDA devices: `nvbench::exec_tag::sync`](examples/exec_tag_sync.cu) - [Manual timing: `nvbench::exec_tag::timer`](examples/exec_tag_timer.cu) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b3e635c..8a89b31 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -8,6 +8,7 @@ set(example_srcs exec_tag_timer.cu skip.cu stream.cu + summaries.cu throughput.cu ) diff --git a/examples/summaries.cu b/examples/summaries.cu new file mode 100644 index 0000000..57428c4 --- /dev/null +++ b/examples/summaries.cu @@ -0,0 +1,70 @@ +/* + * Copyright 2025 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// Grab some testing kernels from NVBench: +#include + +// #define PRINT_DEFAULT_SUMMARY_TAGS + +void summary_example(nvbench::state &state) +{ + // Fetch parameters and compute duration in seconds: + const auto ms = static_cast(state.get_int64("ms")); + const auto us = static_cast(state.get_int64("us")); + const auto duration = ms * 1e-3 + us * 1e-6; + + // Add a new column to the summary table with the derived duration used by the benchmark. + // See the documentation in nvbench/summary.cuh for more details. + { + nvbench::summary &summary = state.add_summary("duration"); + summary.set_string("name", "Duration (s)"); + summary.set_string("description", "The duration of the kernel execution."); + summary.set_string("hint", "duration"); + summary.set_float64("value", duration); + } + + // Run the measurements: + state.exec([duration](nvbench::launch &launch) { + nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration); + }); + +#ifdef PRINT_DEFAULT_SUMMARY_TAGS + // The default summary tags can be found by inspecting the state after calling + // state.exec. + // They can also be found by looking at the json output (--json ) + for (const auto &summary : state.get_summaries()) + { + std::cout << summary.get_tag() << std::endl; + } +#endif + + // Default summary columns can be shown/hidden in the markdown output tables by adding/removing + // the "hide" key. Modify this benchmark to show the minimum and maximum times, but hide the + // means. + state.get_summary("nv/cold/time/gpu/min").remove_value("hide"); + state.get_summary("nv/cold/time/gpu/max").remove_value("hide"); + state.get_summary("nv/cold/time/gpu/mean").set_string("hide", ""); + state.get_summary("nv/cold/time/cpu/min").remove_value("hide"); + state.get_summary("nv/cold/time/cpu/max").remove_value("hide"); + state.get_summary("nv/cold/time/cpu/mean").set_string("hide", ""); +} +NVBENCH_BENCH(summary_example) + .add_int64_axis("ms", nvbench::range(10, 50, 20)) + .add_int64_axis("us", nvbench::range(100, 500, 200));