diff --git a/python/cuda/bench/results/__init__.pyi b/python/cuda/bench/results/__init__.pyi index 2928441..8698229 100644 --- a/python/cuda/bench/results/__init__.pyi +++ b/python/cuda/bench/results/__init__.pyi @@ -8,7 +8,7 @@ from typing import Any, TypeVar, overload ResultT = TypeVar("ResultT") BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult") -_SummaryValue = int | float | str +_SummaryValue = int | float | str | None class BenchmarkResultDevice: id: int diff --git a/python/cuda/bench/results/_benchmark_result.py b/python/cuda/bench/results/_benchmark_result.py index 6695aba..5b7a49f 100644 --- a/python/cuda/bench/results/_benchmark_result.py +++ b/python/cuda/bench/results/_benchmark_result.py @@ -21,7 +21,7 @@ __all__ = [ ResultT = TypeVar("ResultT") BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult") -_SummaryValue = int | float | str +_SummaryValue = int | float | str | None @dataclass(frozen=True) @@ -79,16 +79,49 @@ def extract_size(summary: dict) -> int: ) from e -def parse_summary_value(value_data: dict) -> _SummaryValue: - value_type = value_data["type"] +def parse_summary_value( + value_data: dict, + *, + summary_tag: str, + field_name: str, +) -> _SummaryValue: + value_type = value_data.get("type") + if "value" not in value_data: + raise ValueError( + f"summary {summary_tag!r} field {field_name!r} is missing value" + ) + value = value_data["value"] + if value is None: + return None + if value_type == "int64": - return int(value) + try: + return int(value) + except (TypeError, ValueError) as e: + raise ValueError( + f"summary {summary_tag!r} field {field_name!r} value {value!r} " + "is not an int64" + ) from e if value_type == "float64": - return float(value) + try: + return float(value) + except (TypeError, ValueError) as e: + raise ValueError( + f"summary {summary_tag!r} field {field_name!r} value {value!r} " + "is not a float64" + ) from e if value_type == "string": + if not isinstance(value, str): + raise ValueError( + f"summary {summary_tag!r} field {field_name!r} value {value!r} " + "is not a string" + ) return value - raise ValueError(f"unsupported summary value type: {value_type}") + raise ValueError( + f"summary {summary_tag!r} field {field_name!r} has unsupported " + f"value type {value_type!r}" + ) @dataclass(frozen=True) @@ -116,12 +149,22 @@ class BenchmarkResultSummary: def parse_summary(summary: dict) -> BenchmarkResultSummary: - data = { - value_data["name"]: parse_summary_value(value_data) - for value_data in summary.get("data", []) - } + summary_tag = summary["tag"] + data = {} + for value_data in summary.get("data", []): + field_name = value_data.get("name") + if not isinstance(field_name, str): + raise ValueError( + f"summary {summary_tag!r} has a data entry with a missing " + "or non-string name" + ) + data[field_name] = parse_summary_value( + value_data, + summary_tag=summary_tag, + field_name=field_name, + ) return BenchmarkResultSummary( - tag=summary["tag"], + tag=summary_tag, name=summary.get("name"), hint=summary.get("hint"), hide=summary.get("hide"), @@ -324,12 +367,10 @@ class SubBenchmarkResult: axes_names[short_name] = full_name axes_values[short_name] = this_axis_values - self.states = [] - for state in bench["states"]: - if not state.get("is_skipped", False): - self.states.append( - SubBenchmarkState(state, axes_names, axes_values, json_dir) - ) + self.states = [ + SubBenchmarkState(state, axes_names, axes_values, json_dir) + for state in bench["states"] + ] def __repr__(self) -> str: return str(self.__dict__) diff --git a/python/scripts/nvbench_json_summary.py b/python/scripts/nvbench_json_summary.py index 1478bff..2252b66 100644 --- a/python/scripts/nvbench_json_summary.py +++ b/python/scripts/nvbench_json_summary.py @@ -143,6 +143,8 @@ def format_percentage(summary: BenchmarkResultSummary) -> str: def format_summary(summary: BenchmarkResultSummary) -> str: + if summary.value is None: + return "" if summary.hint == "duration": return format_duration(summary) if summary.hint == "item_rate": @@ -215,6 +217,8 @@ def format_benchmark(result: BenchmarkResult, bench: SubBenchmarkResult) -> str: table = MarkdownTable() row = 0 for state in bench.states: + if state.is_skipped: + continue if device_id is not None and state.device != device_id: continue add_state_row(table, row, state, bench) diff --git a/python/test/test_benchmark_result.py b/python/test/test_benchmark_result.py index 48c361a..e123730 100644 --- a/python/test/test_benchmark_result.py +++ b/python/test/test_benchmark_result.py @@ -207,6 +207,94 @@ def test_state_stores_rich_summary_metadata(sample_state): } +def test_state_preserves_null_summary_values(tmp_path): + json_fn = tmp_path / "result.json" + write_json( + json_fn, + { + "benchmarks": [ + { + "name": "copy", + "axes": [], + "states": [ + { + "name": "Device=0", + "axis_values": [], + "summaries": [ + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": None, + } + ], + } + ], + "is_skipped": False, + } + ], + } + ] + }, + ) + + summary = results.BenchmarkResult.from_json(json_fn)["copy"][0].summaries[ + "nv/cold/time/gpu/stdev/relative" + ] + + assert summary.value is None + assert summary["value"] is None + + +def test_state_reports_malformed_numeric_summary_values(tmp_path): + json_fn = tmp_path / "result.json" + write_json( + json_fn, + { + "benchmarks": [ + { + "name": "copy", + "axes": [], + "states": [ + { + "name": "Device=0", + "axis_values": [], + "summaries": [ + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "not-a-number", + } + ], + } + ], + "is_skipped": False, + } + ], + } + ] + }, + ) + + with pytest.raises( + ValueError, + match=( + "summary 'nv/cold/time/gpu/mean' field 'value' " + "value 'not-a-number' is not a float64" + ), + ): + results.BenchmarkResult.from_json(json_fn) + + def test_state_loads_samples_and_frequencies(sample_state): assert sample_state.samples is not None assert list(sample_state.samples) == pytest.approx([1.0, 2.0, 4.0]) @@ -432,7 +520,7 @@ def test_benchmark_result_normalizes_axis_value_lookup_key(): assert result.states[2].point == {"NumBlocks": "64"} -def test_benchmark_result_ignores_skipped_state_with_no_summaries(): +def test_benchmark_result_preserves_skipped_state_with_no_summaries(): result = results.SubBenchmarkResult( { "name": "copy_sweep_grid_shape", @@ -467,8 +555,14 @@ def test_benchmark_result_ignores_skipped_state_with_no_summaries(): "", ) - assert len(result.states) == 1 - assert result.states[0].name() == "BlockSize[pow2]=6" + assert len(result.states) == 2 + assert result.states[0].name() == "BlockSize[pow2]=8" + assert result.states[0].is_skipped is True + assert result.states[0].summaries == {} + assert result.states[0].samples is None + assert result.states[0].frequencies is None + assert result.states[1].name() == "BlockSize[pow2]=6" + assert result.states[1].is_skipped is False def test_benchmark_result_uses_empty_summaries_when_field_is_missing(): diff --git a/python/test/test_nvbench_json_summary.py b/python/test/test_nvbench_json_summary.py index 0489e5d..3bba73b 100644 --- a/python/test/test_nvbench_json_summary.py +++ b/python/test/test_nvbench_json_summary.py @@ -167,6 +167,19 @@ def test_json_summary_formats_nvbench_style_markdown(tmp_path): assert "Min GPU Time" not in report +def test_json_summary_formats_null_summary_value_as_blank(): + summary = nvbench_json_summary.BenchmarkResultSummary( + tag="nv/cold/time/gpu/stdev/relative", + name="Noise", + hint="percentage", + hide=None, + description=None, + data={"value": None}, + ) + + assert nvbench_json_summary.format_summary(summary) == "" + + def test_json_summary_formats_axis_values_like_markdown_printer(): axes_by_name = { "BlockSize": { @@ -259,6 +272,99 @@ def test_json_summary_formats_state_with_null_axis_values(tmp_path): assert "| 7x |" in report +def test_json_summary_omits_skipped_states(tmp_path): + json_path = tmp_path / "result.json" + json_path.write_text( + json.dumps( + { + "devices": [ + { + "id": 0, + "name": "Test GPU", + } + ], + "benchmarks": [ + { + "name": "copy", + "devices": [0], + "axes": [ + { + "name": "BlockSize", + "type": "int64", + "flags": "pow2", + "values": [ + { + "input_string": "8", + "description": "2^8 = 256", + "value": 256, + }, + { + "input_string": "9", + "description": "2^9 = 512", + "value": 512, + }, + ], + } + ], + "states": [ + { + "name": "Device=0 BlockSize=2^8", + "device": 0, + "axis_values": [ + { + "name": "BlockSize", + "type": "int64", + "value": "256", + } + ], + "summaries": None, + "is_skipped": True, + "skip_reason": "Deadlock detected", + }, + { + "name": "Device=0 BlockSize=2^9", + "device": 0, + "axis_values": [ + { + "name": "BlockSize", + "type": "int64", + "value": "512", + } + ], + "summaries": [ + { + "tag": "nv/cold/time/gpu/sample_size", + "name": "Samples", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3", + } + ], + } + ], + "is_skipped": False, + }, + ], + } + ], + } + ), + encoding="utf-8", + ) + + result = nvbench_json_summary.BenchmarkResult.from_json(json_path) + report = nvbench_json_summary.format_result(result) + + assert "Skip Reason" not in report + assert "Deadlock detected" not in report + assert "2^8 = 256" not in report + assert "2^9 = 512" in report + assert "3x" in report + + def test_json_summary_cli_writes_output_file(tmp_path): json_path = tmp_path / "result.json" output_path = tmp_path / "summary.md"