diff --git a/python/cuda/bench/__init__.py b/python/cuda/bench/__init__.py index 7eb4fb0..8214f5b 100644 --- a/python/cuda/bench/__init__.py +++ b/python/cuda/bench/__init__.py @@ -18,8 +18,6 @@ import importlib import importlib.metadata import warnings -from ._bench_result import BenchmarkResult, SubBenchResult, SubBenchState - try: __version__ = importlib.metadata.version("cuda-bench") except Exception as e: @@ -31,10 +29,6 @@ except Exception as e: ) -BenchmarkResult.__module__ = __name__ -SubBenchResult.__module__ = __name__ -SubBenchState.__module__ = __name__ - _NVBENCH_EXPORTS = ( "Benchmark", "CudaStream", @@ -51,9 +45,6 @@ _NVBENCH_TEST_EXPORTS = ( ) __all__ = [ - "BenchmarkResult", - "SubBenchResult", - "SubBenchState", *_NVBENCH_EXPORTS, ] diff --git a/python/cuda/bench/__init__.pyi b/python/cuda/bench/__init__.pyi index 8773d1a..9e0d264 100644 --- a/python/cuda/bench/__init__.pyi +++ b/python/cuda/bench/__init__.pyi @@ -25,31 +25,18 @@ # stubs in generated out/cuda/nvbench/_nvbench.pyi # with definitions given here. -from array import array from collections.abc import ( Callable, - ItemsView, - Iterator, - KeysView, Sequence, - ValuesView, ) -from os import PathLike from typing import ( - Any, Optional, Self, SupportsFloat, SupportsInt, - TypeVar, Union, - overload, ) -ResultT = TypeVar("ResultT") -_SummaryValue = int | float | str -_SummaryData = _SummaryValue | dict[str, _SummaryValue] - class CudaStream: def __cuda_stream__(self) -> tuple[int, int]: ... def addressof(self) -> int: ... @@ -138,60 +125,3 @@ def register(fn: Callable[[State], None]) -> Benchmark: ... def run_all_benchmarks(argv: Sequence[str]) -> None: ... class NVBenchRuntimeError(RuntimeError): ... - -class SubBenchState: - state_name: str - summaries: dict[str, _SummaryData] - samples: array | None - frequencies: array | None - bw: float | None - point: dict[str, str] - def name(self) -> str: ... - def center(self, estimator: Callable[[array], ResultT]) -> ResultT | None: ... - def center_with_frequencies( - self, estimator: Callable[[array, array], ResultT] - ) -> ResultT | None: ... - -class SubBenchResult: - states: list[SubBenchState] - def __len__(self) -> int: ... - @overload - def __getitem__(self, state_index: int) -> SubBenchState: ... - @overload - def __getitem__(self, state_index: slice) -> list[SubBenchState]: ... - def __iter__(self) -> Iterator[SubBenchState]: ... - def centers( - self, estimator: Callable[[array], ResultT] - ) -> dict[str, ResultT | None]: ... - def centers_with_frequencies( - self, estimator: Callable[[array, array], ResultT] - ) -> dict[str, ResultT | None]: ... - -class BenchmarkResult: - metadata: Any - subbenches: dict[str, SubBenchResult] - def __init__( - self, - *, - json_path: str | PathLike[str], - metadata: Any = None, - ) -> None: ... - @classmethod - def empty(cls, *, metadata: Any = None) -> Self: ... - @classmethod - def from_json( - cls, json_path: str | PathLike[str], *, metadata: Any = None - ) -> Self: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[str]: ... - def __contains__(self, subbench_name: object) -> bool: ... - def __getitem__(self, subbench_name: str) -> SubBenchResult: ... - def keys(self) -> KeysView[str]: ... - def values(self) -> ValuesView[SubBenchResult]: ... - def items(self) -> ItemsView[str, SubBenchResult]: ... - def centers( - self, estimator: Callable[[array], ResultT] - ) -> dict[str, dict[str, ResultT | None]]: ... - def centers_with_frequencies( - self, estimator: Callable[[array, array], ResultT] - ) -> dict[str, dict[str, ResultT | None]]: ... diff --git a/python/cuda/bench/results/__init__.py b/python/cuda/bench/results/__init__.py new file mode 100644 index 0000000..955d975 --- /dev/null +++ b/python/cuda/bench/results/__init__.py @@ -0,0 +1,39 @@ +# Copyright 2026 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for reading NVBench JSON benchmark result files.""" + +from ._benchmark_result import ( + BenchmarkResult, + BenchmarkResultDevice, + BenchmarkResultSummary, + SubBenchmarkResult, + SubBenchmarkState, +) + +BenchmarkResult.__module__ = __name__ +BenchmarkResultDevice.__module__ = __name__ +BenchmarkResultSummary.__module__ = __name__ +SubBenchmarkResult.__module__ = __name__ +SubBenchmarkState.__module__ = __name__ + +__all__ = [ + "BenchmarkResult", + "BenchmarkResultDevice", + "BenchmarkResultSummary", + "SubBenchmarkResult", + "SubBenchmarkState", +] diff --git a/python/cuda/bench/results/__init__.pyi b/python/cuda/bench/results/__init__.pyi new file mode 100644 index 0000000..3435895 --- /dev/null +++ b/python/cuda/bench/results/__init__.pyi @@ -0,0 +1,109 @@ +# Copyright 2026 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from array import array +from collections.abc import Callable, ItemsView, Iterator, KeysView, ValuesView +from os import PathLike +from typing import Any, TypeVar, overload + +ResultT = TypeVar("ResultT") +BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult") +_SummaryValue = int | float | str + +class BenchmarkResultDevice: + id: int + name: str + data: dict[str, Any] + +class BenchmarkResultSummary: + tag: str + name: str | None + hint: str | None + hide: str | None + description: str | None + data: dict[str, _SummaryValue] + @property + def value(self) -> _SummaryValue | None: ... + def __getitem__(self, key: str) -> _SummaryValue: ... + def get( + self, key: str, default: _SummaryValue | None = None + ) -> _SummaryValue | None: ... + +class SubBenchmarkState: + state_name: str + device: int | None + type_config_index: int | None + axis_values: list[dict[str, Any]] + is_skipped: bool + skip_reason: str | None + summaries: dict[str, BenchmarkResultSummary] + samples: array | None + frequencies: array | None + bw: float | None + point: dict[str, str] + def name(self) -> str: ... + def center(self, estimator: Callable[[array], ResultT]) -> ResultT | None: ... + def center_with_frequencies( + self, estimator: Callable[[array, array], ResultT] + ) -> ResultT | None: ... + +class SubBenchmarkResult: + name: str + devices: list[int] + axes: list[dict[str, Any]] + states: list[SubBenchmarkState] + def __len__(self) -> int: ... + @overload + def __getitem__(self, state_index: int) -> SubBenchmarkState: ... + @overload + def __getitem__(self, state_index: slice) -> list[SubBenchmarkState]: ... + def __iter__(self) -> Iterator[SubBenchmarkState]: ... + def centers( + self, estimator: Callable[[array], ResultT] + ) -> dict[str, ResultT | None]: ... + def centers_with_frequencies( + self, estimator: Callable[[array, array], ResultT] + ) -> dict[str, ResultT | None]: ... + +class BenchmarkResult: + metadata: Any + devices: dict[int, BenchmarkResultDevice] + subbenches: dict[str, SubBenchmarkResult] + def __init__(self, token: object | None = None) -> None: ... + @classmethod + def empty( + cls: type[BenchmarkResultT], *, metadata: Any = None + ) -> BenchmarkResultT: ... + @classmethod + def from_json( + cls: type[BenchmarkResultT], + json_path: str | PathLike[str], + *, + metadata: Any = None, + ) -> BenchmarkResultT: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[str]: ... + def __contains__(self, subbench_name: object) -> bool: ... + def __getitem__(self, subbench_name: str) -> SubBenchmarkResult: ... + def keys(self) -> KeysView[str]: ... + def values(self) -> ValuesView[SubBenchmarkResult]: ... + def items(self) -> ItemsView[str, SubBenchmarkResult]: ... + def centers( + self, estimator: Callable[[array], ResultT] + ) -> dict[str, dict[str, ResultT | None]]: ... + def centers_with_frequencies( + self, estimator: Callable[[array, array], ResultT] + ) -> dict[str, dict[str, ResultT | None]]: ... diff --git a/python/cuda/bench/_bench_result.py b/python/cuda/bench/results/_benchmark_result.py similarity index 68% rename from python/cuda/bench/_bench_result.py rename to python/cuda/bench/results/_benchmark_result.py index 6072041..d0806c7 100644 --- a/python/cuda/bench/_bench_result.py +++ b/python/cuda/bench/results/_benchmark_result.py @@ -14,19 +14,36 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import array import json import os import sys from collections.abc import ItemsView, Iterator, KeysView, ValuesView +from dataclasses import dataclass from typing import Any, Callable, TypeVar -__all__ = ["BenchmarkResult", "SubBenchResult", "SubBenchState"] +__all__ = [ + "BenchmarkResult", + "BenchmarkResultDevice", + "BenchmarkResultSummary", + "SubBenchmarkResult", + "SubBenchmarkState", +] ResultT = TypeVar("ResultT") BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult") _SummaryValue = int | float | str -_SummaryData = _SummaryValue | dict[str, _SummaryValue] + + +@dataclass(frozen=True) +class BenchmarkResultDevice: + """Device metadata parsed from an NVBench JSON result file.""" + + id: int + name: str + data: dict[str, Any] def read_json(filename: str | os.PathLike[str]) -> dict: @@ -49,13 +66,6 @@ def extract_size(summary: dict) -> int: return int(value_data["value"]) -def extract_bw(summary: dict) -> float: - summary_data = summary["data"] - value_data = next(filter(lambda v: v["name"] == "value", summary_data)) - assert value_data["type"] == "float64" - return float(value_data["value"]) - - def parse_summary_value(value_data: dict) -> _SummaryValue: value_type = value_data["type"] value = value_data["value"] @@ -68,19 +78,48 @@ def parse_summary_value(value_data: dict) -> _SummaryValue: raise ValueError(f"unsupported summary value type: {value_type}") -def parse_summary_data(summary: dict) -> _SummaryData: - summary_values = { +@dataclass(frozen=True) +class BenchmarkResultSummary: + """Summary record parsed from one NVBench benchmark state.""" + + tag: str + name: str | None + hint: str | None + hide: str | None + description: str | None + data: dict[str, _SummaryValue] + + @property + def value(self) -> _SummaryValue | None: + return self.data.get("value") + + def __getitem__(self, key: str) -> _SummaryValue: + return self.data[key] + + def get( + self, key: str, default: _SummaryValue | None = None + ) -> _SummaryValue | None: + return self.data.get(key, default) + + +def parse_summary(summary: dict) -> BenchmarkResultSummary: + data = { value_data["name"]: parse_summary_value(value_data) - for value_data in summary["data"] + for value_data in summary.get("data", []) } - if len(summary_values) == 1 and "value" in summary_values: - return summary_values["value"] - return summary_values + return BenchmarkResultSummary( + tag=summary["tag"], + name=summary.get("name"), + hint=summary.get("hint"), + hide=summary.get("hide"), + description=summary.get("description"), + data=data, + ) -def parse_summaries(state: dict) -> dict[str, _SummaryData]: +def parse_summaries(state: dict) -> dict[str, BenchmarkResultSummary]: return { - summary["tag"]: parse_summary_data(summary) for summary in state["summaries"] + summary["tag"]: parse_summary(summary) for summary in state["summaries"] or [] } @@ -169,17 +208,12 @@ def parse_frequencies(state: dict, json_dir: str) -> array.array | None: return parse_float32_binary(frequency_count, frequencies_filename, json_dir) -def parse_bw(state: dict) -> float | None: - bwutil = next( - filter( - lambda s: s["tag"] == "nv/cold/bw/global/utilization", state["summaries"] - ), - None, - ) - if not bwutil: +def parse_bw(summaries: dict[str, BenchmarkResultSummary]) -> float | None: + bwutil = summaries.get("nv/cold/bw/global/utilization") + if bwutil is None or bwutil.value is None: return None - return extract_bw(bwutil) + return float(bwutil.value) def get_axis_name(axis: dict) -> str: @@ -189,9 +223,16 @@ def get_axis_name(axis: dict) -> str: return name -class SubBenchState: +class SubBenchmarkState: + """Result data for one executed state of an NVBench benchmark.""" + def __init__(self, state: dict, axes_names: dict, axes_values: dict, json_dir: str): self.state_name = state["name"] + self.device = state.get("device") + self.type_config_index = state.get("type_config_index") + self.axis_values = state.get("axis_values") or [] + self.is_skipped = state.get("is_skipped", False) + self.skip_reason = state.get("skip_reason") self.summaries = parse_summaries(state) self.samples = parse_samples(state, json_dir) self.frequencies = parse_frequencies(state, json_dir) @@ -204,10 +245,10 @@ class SubBenchState: f"sample count ({len(self.samples)}) does not match " f"frequency count ({len(self.frequencies)})" ) - self.bw = parse_bw(state) + self.bw = parse_bw(self.summaries) self.point = {} - for axis in state["axis_values"] or []: + for axis in self.axis_values: axis_name = axis["name"] name = axes_names[axis_name] value = axes_values[axis_name][axis["value"]] @@ -234,11 +275,17 @@ class SubBenchState: return estimator(self.samples, self.frequencies) -class SubBenchResult: +class SubBenchmarkResult: + """Result data for one NVBench benchmark and its executed states.""" + def __init__(self, bench: dict, json_dir: str): + self.name = bench["name"] + self.devices = bench.get("devices") or [] + self.axes = bench.get("axes") or [] + axes_names = {} axes_values = {} - for axis in bench["axes"] or []: + for axis in self.axes: short_name = axis["name"] full_name = get_axis_name(axis) this_axis_values = {} @@ -252,9 +299,9 @@ class SubBenchResult: self.states = [] for state in bench["states"]: - if not state["is_skipped"]: + if not state.get("is_skipped", False): self.states.append( - SubBenchState(state, axes_names, axes_values, json_dir) + SubBenchmarkState(state, axes_names, axes_values, json_dir) ) def __repr__(self) -> str: @@ -265,10 +312,10 @@ class SubBenchResult: def __getitem__( self, state_index: int | slice - ) -> SubBenchState | list[SubBenchState]: + ) -> SubBenchmarkState | list[SubBenchmarkState]: return self.states[state_index] - def __iter__(self) -> Iterator[SubBenchState]: + def __iter__(self) -> Iterator[SubBenchmarkState]: return iter(self.states) def centers( @@ -289,23 +336,39 @@ class SubBenchResult: class BenchmarkResult: - """Parsed result data from an NVBench JSON output file.""" + """Container for benchmark result data parsed from NVBench JSON output. + + Instances are created with :meth:`from_json` or :meth:`empty`. Direct + construction is intentionally disabled to keep creation paths explicit. + """ + + _construction_token = object() def __init__( self, - *, - json_path: str | os.PathLike[str], - metadata: Any = None, + token=None, ): - self.metadata = metadata - self.subbenches: dict[str, SubBenchResult] = {} - self._parse_json(json_path) + """Initialize an instance created by a BenchmarkResult class method. + + Users should call :meth:`from_json` or :meth:`empty` instead. The token + argument is an implementation detail used to prevent direct + construction. + """ + if token is not self._construction_token: + raise TypeError( + "BenchmarkResult cannot be constructed directly; " + "use BenchmarkResult.from_json() or BenchmarkResult.empty()" + ) + + self.metadata: Any = None + self.devices: dict[int, BenchmarkResultDevice] = {} + self.subbenches: dict[str, SubBenchmarkResult] = {} @classmethod def empty(cls: type[BenchmarkResultT], *, metadata: Any = None) -> BenchmarkResultT: - result = cls.__new__(cls) + """Create an empty result container with optional user metadata.""" + result = cls(cls._construction_token) result.metadata = metadata - result.subbenches = {} return result @classmethod @@ -315,14 +378,27 @@ class BenchmarkResult: *, metadata: Any = None, ) -> BenchmarkResultT: - return cls(json_path=json_path, metadata=metadata) + """Read benchmark result data from an NVBench JSON output file.""" + result = cls.empty(metadata=metadata) + result._parse_json(json_path) + return result def _parse_json(self, json_path: str | os.PathLike[str]) -> None: + """Populate this instance from an NVBench JSON output file.""" json_path = os.fspath(json_path) json_dir = os.path.dirname(os.path.abspath(json_path)) - for bench in read_json(json_path)["benchmarks"]: + result_json = read_json(json_path) + self.devices = { + int(device["id"]): BenchmarkResultDevice( + id=int(device["id"]), + name=device["name"], + data=device, + ) + for device in result_json.get("devices", []) + } + for bench in result_json["benchmarks"]: bench_name: str = bench["name"] - self.subbenches[bench_name] = SubBenchResult(bench, json_dir) + self.subbenches[bench_name] = SubBenchmarkResult(bench, json_dir) def __repr__(self) -> str: return str(self.__dict__) @@ -336,16 +412,16 @@ class BenchmarkResult: def __contains__(self, subbench_name: object) -> bool: return subbench_name in self.subbenches - def __getitem__(self, subbench_name: str) -> SubBenchResult: + def __getitem__(self, subbench_name: str) -> SubBenchmarkResult: return self.subbenches[subbench_name] def keys(self) -> KeysView[str]: return self.subbenches.keys() - def values(self) -> ValuesView[SubBenchResult]: + def values(self) -> ValuesView[SubBenchmarkResult]: return self.subbenches.values() - def items(self) -> ItemsView[str, SubBenchResult]: + def items(self) -> ItemsView[str, SubBenchmarkResult]: return self.subbenches.items() def centers( diff --git a/python/pyproject.toml b/python/pyproject.toml index f7ddf78..6a23ff9 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + [build-system] requires = ["scikit-build-core>=0.10", "setuptools_scm"] build-backend = "scikit_build_core.build" @@ -52,6 +55,7 @@ tools = [ [project.scripts] nvbench-compare = "scripts.nvbench_compare:main" nvbench-histogram = "scripts.nvbench_histogram:main" +nvbench-json-summary = "scripts.nvbench_json_summary:main" nvbench-walltime = "scripts.nvbench_walltime:main" [project.urls] @@ -85,4 +89,5 @@ fallback_version = "0.0.0" [tool.scikit-build.wheel.packages] "cuda" = "cuda" "cuda/bench" = "cuda/bench" +"cuda/bench/results" = "cuda/bench/results" "scripts" = "scripts" diff --git a/python/scripts/nvbench_json_summary.py b/python/scripts/nvbench_json_summary.py new file mode 100644 index 0000000..813b57d --- /dev/null +++ b/python/scripts/nvbench_json_summary.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python +# +# Copyright 2026 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +from cuda.bench.results import ( + BenchmarkResult, + BenchmarkResultSummary, + SubBenchmarkResult, + SubBenchmarkState, +) + + +class MarkdownTable: + def __init__(self): + self.columns = [] + + def add_cell(self, row: int, key: str, header: str, value: str) -> None: + column = next((col for col in self.columns if col["key"] == key), None) + if column is None: + column = { + "key": key, + "header": header, + "rows": [], + "max_width": len(header), + } + self.columns.append(column) + + column["max_width"] = max(column["max_width"], len(value)) + while len(column["rows"]) <= row: + column["rows"].append("") + column["rows"][row] = value + + def to_string(self) -> str: + if not self.columns: + return "" + + num_rows = max(len(column["rows"]) for column in self.columns) + for column in self.columns: + while len(column["rows"]) < num_rows: + column["rows"].append("") + + header = "|" + divider = "|" + for column in self.columns: + width = column["max_width"] + header += f" {column['header']:^{width}} |" + divider += f"{'':-^{width + 2}}|" + + rows = [] + for row in range(num_rows): + row_text = "|" + for column in self.columns: + row_text += f" {column['rows'][row]:>{column['max_width']}} |" + rows.append(row_text) + + return "\n".join([header, divider, *rows]) + "\n" + + +def format_default(summary: BenchmarkResultSummary) -> str: + value = summary.value + if isinstance(value, float): + return f"{value:.5g}" + if value is None: + return "" + return str(value) + + +def format_duration(summary: BenchmarkResultSummary) -> str: + seconds = float(summary["value"]) + if seconds >= 1.0: + return f"{seconds:0.3f} s" + if seconds >= 1e-3: + return f"{seconds * 1e3:0.3f} ms" + if seconds >= 1e-6: + return f"{seconds * 1e6:0.3f} us" + return f"{seconds * 1e9:0.3f} ns" + + +def format_item_rate(summary: BenchmarkResultSummary) -> str: + items_per_second = float(summary["value"]) + if items_per_second >= 1e15: + return f"{items_per_second * 1e-15:0.3f}P" + if items_per_second >= 1e12: + return f"{items_per_second * 1e-12:0.3f}T" + if items_per_second >= 1e9: + return f"{items_per_second * 1e-9:0.3f}G" + if items_per_second >= 1e6: + return f"{items_per_second * 1e-6:0.3f}M" + if items_per_second >= 1e3: + return f"{items_per_second * 1e-3:0.3f}K" + return f"{items_per_second:0.3f}" + + +def format_frequency(summary: BenchmarkResultSummary) -> str: + frequency_hz = float(summary["value"]) + if frequency_hz >= 1e9: + return f"{frequency_hz * 1e-9:0.3f} GHz" + if frequency_hz >= 1e6: + return f"{frequency_hz * 1e-6:0.3f} MHz" + if frequency_hz >= 1e3: + return f"{frequency_hz * 1e-3:0.3f} KHz" + return f"{frequency_hz:0.3f} Hz" + + +def format_bytes(summary: BenchmarkResultSummary) -> str: + nbytes = float(summary["value"]) + if nbytes >= 1024.0 * 1024.0 * 1024.0: + return f"{nbytes / (1024.0 * 1024.0 * 1024.0):0.3f} GiB" + if nbytes >= 1024.0 * 1024.0: + return f"{nbytes / (1024.0 * 1024.0):0.3f} MiB" + if nbytes >= 1024.0: + return f"{nbytes / 1024.0:0.3f} KiB" + return f"{nbytes:0.3f} B" + + +def format_byte_rate(summary: BenchmarkResultSummary) -> str: + bytes_per_second = float(summary["value"]) + if bytes_per_second >= 1e15: + return f"{bytes_per_second * 1e-15:0.3f} PB/s" + if bytes_per_second >= 1e12: + return f"{bytes_per_second * 1e-12:0.3f} TB/s" + if bytes_per_second >= 1e9: + return f"{bytes_per_second * 1e-9:0.3f} GB/s" + if bytes_per_second >= 1e6: + return f"{bytes_per_second * 1e-6:0.3f} MB/s" + if bytes_per_second >= 1e3: + return f"{bytes_per_second * 1e-3:0.3f} KB/s" + return f"{bytes_per_second:0.3f} B/s" + + +def format_sample_size(summary: BenchmarkResultSummary) -> str: + return f"{int(summary['value'])}x" + + +def format_percentage(summary: BenchmarkResultSummary) -> str: + return f"{float(summary['value']) * 100.0:.2f}%" + + +def format_summary(summary: BenchmarkResultSummary) -> str: + if summary.hint == "duration": + return format_duration(summary) + if summary.hint == "item_rate": + return format_item_rate(summary) + if summary.hint == "frequency": + return format_frequency(summary) + if summary.hint == "bytes": + return format_bytes(summary) + if summary.hint == "byte_rate": + return format_byte_rate(summary) + if summary.hint == "sample_size": + return format_sample_size(summary) + if summary.hint == "percentage": + return format_percentage(summary) + return format_default(summary) + + +def format_axis_value( + axis_value: dict, axes_by_name: dict[str, dict] +) -> tuple[str, str]: + name = axis_value["name"] + axis = axes_by_name.get(name, {}) + value = axis_value["value"] + if axis.get("type") == "int64" and axis.get("flags") == "pow2": + int_value = int(value) + exponent = int_value.bit_length() - 1 + return name, f"2^{exponent} = {int_value}" + + value_type = axis_value.get("type", axis.get("type")) + if value_type == "int64": + return name, str(int(value)) + if value_type == "float64": + return name, f"{float(value):.5g}" + + return name, str(value) + + +def add_state_row( + table: MarkdownTable, + row: int, + state: SubBenchmarkState, + bench: SubBenchmarkResult, +) -> None: + axes_by_name = {axis["name"]: axis for axis in bench.axes} + + for axis_value in state.axis_values: + header, value = format_axis_value(axis_value, axes_by_name) + table.add_cell(row, f"axis:{header}", header, value) + + for summary in state.summaries.values(): + if summary.hide is not None: + continue + header = summary.name if summary.name is not None else summary.tag + table.add_cell(row, summary.tag, header, format_summary(summary)) + + +def format_benchmark(result: BenchmarkResult, bench: SubBenchmarkResult) -> str: + parts = [f"## {bench.name}\n\n"] + device_ids: list[int | None] = list(bench.devices) if bench.devices else [None] + + for device_id in device_ids: + if device_id is not None: + device = result.devices.get(device_id) + device_name = device.name if device is not None else f"Device {device_id}" + parts.append(f"### [{device_id}] {device_name}\n\n") + + table = MarkdownTable() + row = 0 + for state in bench.states: + if device_id is not None and state.device != device_id: + continue + add_state_row(table, row, state, bench) + row += 1 + + table_text = table.to_string() + parts.append(table_text if table_text else "No data -- check log.\n") + + return "".join(parts) + + +def format_result(result: BenchmarkResult) -> str: + parts = ["# Benchmark Results\n"] + for bench in result.values(): + parts.append(f"\n{format_benchmark(result, bench)}") + return "".join(parts) + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="nvbench-json-summary", + description="Print an NVBench-style markdown summary table from NVBench JSON output.", + ) + parser.add_argument("json_path", help="Path to an NVBench JSON output file.") + parser.add_argument( + "-o", + "--output", + type=Path, + help="Write markdown output to this file instead of stdout.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + result = BenchmarkResult.from_json(args.json_path) + report = format_result(result) + + if args.output is not None: + args.output.write_text(report, encoding="utf-8") + else: + print(report) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/test/test_bench_result.py b/python/test/test_benchmark_result.py similarity index 89% rename from python/test/test_bench_result.py rename to python/test/test_benchmark_result.py index a01b413..944e3e2 100644 --- a/python/test/test_bench_result.py +++ b/python/test/test_benchmark_result.py @@ -1,8 +1,25 @@ +# Copyright 2026 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import struct from dataclasses import dataclass -import cuda.bench as bench +import cuda.bench +import cuda.bench.results as results import pytest @@ -63,6 +80,9 @@ def test_benchmark_result_reads_jsonbin_relative_to_json_path(tmp_path): }, { "tag": "nv/cold/bw/global/utilization", + "name": "BWUtil", + "hint": "percentage", + "description": "Global memory utilization", "data": [ { "name": "value", @@ -98,10 +118,12 @@ def test_benchmark_result_reads_jsonbin_relative_to_json_path(tmp_path): ) metadata = {"returncode": 0, "elapsed_seconds": 0.25} - default_result = bench.BenchmarkResult.from_json(json_fn) - result = bench.BenchmarkResult(json_path=json_fn, metadata=metadata) + default_result = results.BenchmarkResult.from_json(json_fn) + result = results.BenchmarkResult.from_json(json_fn, metadata=metadata) - assert bench.BenchmarkResult.__module__ == bench.__name__ + assert results.BenchmarkResult.__module__ == results.__name__ + assert results.BenchmarkResultSummary.__module__ == results.__name__ + assert not hasattr(cuda.bench, "BenchmarkResult") assert default_result.metadata is None assert result.metadata is metadata subbench = result["copy"] @@ -121,12 +143,19 @@ def test_benchmark_result_reads_jsonbin_relative_to_json_path(tmp_path): subbench[1] assert state.name() == "BlockSize[pow2]=8" assert state.bw == 0.75 - assert state.summaries["nv/cold/bw/global/utilization"] == pytest.approx(0.75) - assert state.summaries["nv/json/bin:nv/cold/sample_times"] == { + bw_summary = state.summaries["nv/cold/bw/global/utilization"] + assert bw_summary.tag == "nv/cold/bw/global/utilization" + assert bw_summary.name == "BWUtil" + assert bw_summary.hint == "percentage" + assert bw_summary.hide is None + assert bw_summary.description == "Global memory utilization" + assert bw_summary.value == pytest.approx(0.75) + assert bw_summary["value"] == pytest.approx(0.75) + assert state.summaries["nv/json/bin:nv/cold/sample_times"].data == { "filename": "result.json-bin/0.bin", "size": 3, } - assert state.summaries["nv/json/freqs-bin:nv/cold/sample_freqs"] == { + assert state.summaries["nv/json/freqs-bin:nv/cold/sample_freqs"].data == { "filename": "result.json-freqs-bin/0.bin", "size": 3, } @@ -154,13 +183,15 @@ def test_benchmark_result_reads_jsonbin_relative_to_json_path(tmp_path): result["missing"] -def test_benchmark_result_json_path_is_required_keyword(): +def test_benchmark_result_constructor_is_private(): + with pytest.raises(TypeError, match="from_json\\(\\).*empty\\(\\)"): + results.BenchmarkResult() + with pytest.raises(TypeError, match="from_json\\(\\).*empty\\(\\)"): + results.BenchmarkResult("result.json") with pytest.raises(TypeError): - bench.BenchmarkResult("result.json") + results.BenchmarkResult(metadata=None) with pytest.raises(TypeError): - bench.BenchmarkResult(metadata=None) - with pytest.raises(TypeError): - bench.BenchmarkResult(json_path="result.json", parse=False) + results.BenchmarkResult(json_path="result.json", parse=False) def test_benchmark_result_empty_does_not_read_json(tmp_path): @@ -172,15 +203,15 @@ def test_benchmark_result_empty_does_not_read_json(tmp_path): metadata = RunMetadata(returncode=1, elapsed_seconds=0.25) missing_json = tmp_path / "missing.json" - result = bench.BenchmarkResult.empty(metadata=metadata) + result = results.BenchmarkResult.empty(metadata=metadata) assert result.metadata is metadata assert result.subbenches == {} with pytest.raises(FileNotFoundError): - bench.BenchmarkResult(json_path=missing_json, metadata=metadata) + results.BenchmarkResult.from_json(missing_json, metadata=metadata) with pytest.raises(FileNotFoundError): - bench.BenchmarkResult.from_json(json_path=missing_json, metadata=metadata) + results.BenchmarkResult.from_json(json_path=missing_json, metadata=metadata) def test_benchmark_result_accepts_no_axis_benchmark_with_recorded_binary_path( @@ -251,7 +282,7 @@ def test_benchmark_result_accepts_no_axis_benchmark_with_recorded_binary_path( monkeypatch.chdir(tmp_path) - result = bench.BenchmarkResult(json_path="temp_data/axes_run1.json") + result = results.BenchmarkResult.from_json("temp_data/axes_run1.json") state = result.subbenches["simple"].states[0] assert state.name() == "Device=0" @@ -263,7 +294,7 @@ def test_benchmark_result_accepts_no_axis_benchmark_with_recorded_binary_path( def test_benchmark_result_accepts_axis_value_input_string(): - result = bench.SubBenchResult( + result = results.SubBenchmarkResult( { "name": "single_float64_axis", "axes": [ @@ -304,7 +335,7 @@ def test_benchmark_result_accepts_axis_value_input_string(): def test_benchmark_result_ignores_skipped_state_with_no_summaries(): - result = bench.SubBenchResult( + result = results.SubBenchmarkResult( { "name": "copy_sweep_grid_shape", "axes": [ @@ -451,7 +482,7 @@ def test_benchmark_result_uses_none_for_unavailable_samples(tmp_path): encoding="utf-8", ) - result = bench.BenchmarkResult(json_path=json_fn) + result = results.BenchmarkResult.from_json(json_fn) states = result.subbenches["copy"].states assert states[0].samples is None @@ -556,4 +587,4 @@ def test_benchmark_result_rejects_mismatched_sample_and_frequency_counts(tmp_pat ) with pytest.raises(ValueError, match="sample count .* frequency count"): - bench.BenchmarkResult(json_path=json_fn) + results.BenchmarkResult.from_json(json_fn) diff --git a/python/test/test_nvbench_json_summary.py b/python/test/test_nvbench_json_summary.py new file mode 100644 index 0000000..f8b3340 --- /dev/null +++ b/python/test/test_nvbench_json_summary.py @@ -0,0 +1,221 @@ +# Copyright 2026 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.util +import json +from pathlib import Path + + +def load_nvbench_json_summary(): + module_path = ( + Path(__file__).resolve().parents[1] / "scripts" / "nvbench_json_summary.py" + ) + spec = importlib.util.spec_from_file_location("nvbench_json_summary", module_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +nvbench_json_summary = load_nvbench_json_summary() + + +def write_result_json(path): + path.write_text( + json.dumps( + { + "devices": [ + { + "id": 0, + "name": "Test GPU", + } + ], + "benchmarks": [ + { + "name": "copy", + "devices": [0], + "axes": [ + { + "name": "BlockSize", + "type": "int64", + "flags": "pow2", + "values": [ + { + "input_string": "8", + "description": "2^8 = 256", + "value": 256, + } + ], + } + ], + "states": [ + { + "name": "Device=0 BlockSize=2^8", + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "BlockSize", + "type": "int64", + "value": "256", + } + ], + "summaries": [ + { + "tag": "nv/cold/time/gpu/sample_size", + "name": "Samples", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "12", + } + ], + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.25e-6", + } + ], + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.015", + } + ], + }, + { + "tag": "nv/cold/bw/global/bytes_per_second", + "name": "GlobalMem BW", + "hint": "byte_rate", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.5e9", + } + ], + }, + { + "tag": "nv/cold/bw/global/utilization", + "name": "BWUtil", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.625", + } + ], + }, + { + "tag": "nv/cold/time/gpu/min", + "name": "Min GPU Time", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.0e-6", + } + ], + }, + ], + "is_skipped": False, + } + ], + } + ], + } + ), + encoding="utf-8", + ) + + +def test_json_summary_formats_nvbench_style_markdown(tmp_path): + json_path = tmp_path / "result.json" + write_result_json(json_path) + + result = nvbench_json_summary.BenchmarkResult.from_json(json_path) + report = nvbench_json_summary.format_result(result) + + assert "# Benchmark Results" in report + assert "## copy" in report + assert "### [0] Test GPU" in report + assert ( + "| BlockSize | Samples | GPU Time | Noise | GlobalMem BW | BWUtil |" in report + ) + assert ( + "| 2^8 = 256 | 12x | 1.250 us | 1.50% | 2.500 GB/s | 62.50% |" in report + ) + assert "Min GPU Time" not in report + + +def test_json_summary_formats_axis_values_like_markdown_printer(): + axes_by_name = { + "BlockSize": { + "name": "BlockSize", + "type": "int64", + "flags": "pow2", + }, + "NumBlocks": { + "name": "NumBlocks", + "type": "int64", + "flags": "", + }, + "Duration": { + "name": "Duration", + "type": "float64", + "flags": "", + }, + } + + assert nvbench_json_summary.format_axis_value( + {"name": "BlockSize", "type": "int64", "value": "256"}, axes_by_name + ) == ("BlockSize", "2^8 = 256") + assert nvbench_json_summary.format_axis_value( + {"name": "NumBlocks", "type": "int64", "value": "64"}, axes_by_name + ) == ("NumBlocks", "64") + assert nvbench_json_summary.format_axis_value( + {"name": "Duration", "type": "float64", "value": "0.123456789"}, + axes_by_name, + ) == ("Duration", "0.12346") + + +def test_json_summary_cli_writes_output_file(tmp_path): + json_path = tmp_path / "result.json" + output_path = tmp_path / "summary.md" + write_result_json(json_path) + + rc = nvbench_json_summary.main([str(json_path), "--output", str(output_path)]) + + assert rc == 0 + assert "GlobalMem BW" in output_path.read_text(encoding="utf-8")