Addressed both issues raised in review

Malformed values are now represented in result as None. Skipped benchmarks are no longer dropped, i.e., they are present in BenchmarkResult data, but they are not reflected in summary table in line with what NVBench-instrumented benchmarks do.
2026-05-13 17:55:39 +00:00 · 2026-05-13 11:42:40 -05:00
parent a38bf890f0
commit dd683850f4
5 changed files with 266 additions and 21 deletions
--- a/python/cuda/bench/results/init.pyi
+++ b/python/cuda/bench/results/init.pyi
@@ -8,7 +8,7 @@ from typing import Any, TypeVar, overload

 ResultT = TypeVar("ResultT")
 BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult")
-_SummaryValue = int | float | str
+_SummaryValue = int | float | str | None

 class BenchmarkResultDevice:
    id: int
--- a/python/cuda/bench/results/_benchmark_result.py
+++ b/python/cuda/bench/results/_benchmark_result.py
@@ -21,7 +21,7 @@ __all__ = [

 ResultT = TypeVar("ResultT")
 BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult")
-_SummaryValue = int | float | str
+_SummaryValue = int | float | str | None


@dataclass(frozen=True)
@@ -79,16 +79,49 @@ def extract_size(summary: dict) -> int:
        ) from e


-def parse_summary_value(value_data: dict) -> _SummaryValue:
-    value_type = value_data["type"]
+def parse_summary_value(
+    value_data: dict,
+    *,
+    summary_tag: str,
+    field_name: str,
+) -> _SummaryValue:
+    value_type = value_data.get("type")
+    if "value" not in value_data:
+        raise ValueError(
+            f"summary {summary_tag!r} field {field_name!r} is missing value"
+        )
+
    value = value_data["value"]
+    if value is None:
+        return None
+
    if value_type == "int64":
-        return int(value)
+        try:
+            return int(value)
+        except (TypeError, ValueError) as e:
+            raise ValueError(
+                f"summary {summary_tag!r} field {field_name!r} value {value!r} "
+                "is not an int64"
+            ) from e
    if value_type == "float64":
-        return float(value)
+        try:
+            return float(value)
+        except (TypeError, ValueError) as e:
+            raise ValueError(
+                f"summary {summary_tag!r} field {field_name!r} value {value!r} "
+                "is not a float64"
+            ) from e
    if value_type == "string":
+        if not isinstance(value, str):
+            raise ValueError(
+                f"summary {summary_tag!r} field {field_name!r} value {value!r} "
+                "is not a string"
+            )
        return value
-    raise ValueError(f"unsupported summary value type: {value_type}")
+    raise ValueError(
+        f"summary {summary_tag!r} field {field_name!r} has unsupported "
+        f"value type {value_type!r}"
+    )


@dataclass(frozen=True)
@@ -116,12 +149,22 @@ class BenchmarkResultSummary:


 def parse_summary(summary: dict) -> BenchmarkResultSummary:
-    data = {
-        value_data["name"]: parse_summary_value(value_data)
-        for value_data in summary.get("data", [])
-    }
+    summary_tag = summary["tag"]
+    data = {}
+    for value_data in summary.get("data", []):
+        field_name = value_data.get("name")
+        if not isinstance(field_name, str):
+            raise ValueError(
+                f"summary {summary_tag!r} has a data entry with a missing "
+                "or non-string name"
+            )
+        data[field_name] = parse_summary_value(
+            value_data,
+            summary_tag=summary_tag,
+            field_name=field_name,
+        )
    return BenchmarkResultSummary(
-        tag=summary["tag"],
+        tag=summary_tag,
        name=summary.get("name"),
        hint=summary.get("hint"),
        hide=summary.get("hide"),
@@ -324,12 +367,10 @@ class SubBenchmarkResult:
            axes_names[short_name] = full_name
            axes_values[short_name] = this_axis_values

-        self.states = []
-        for state in bench["states"]:
-            if not state.get("is_skipped", False):
-                self.states.append(
-                    SubBenchmarkState(state, axes_names, axes_values, json_dir)
-                )
+        self.states = [
+            SubBenchmarkState(state, axes_names, axes_values, json_dir)
+            for state in bench["states"]
+        ]

    def __repr__(self) -> str:
        return str(self.__dict__)
--- a/python/scripts/nvbench_json_summary.py
+++ b/python/scripts/nvbench_json_summary.py
@@ -143,6 +143,8 @@ def format_percentage(summary: BenchmarkResultSummary) -> str:


 def format_summary(summary: BenchmarkResultSummary) -> str:
+    if summary.value is None:
+        return ""
    if summary.hint == "duration":
        return format_duration(summary)
    if summary.hint == "item_rate":
@@ -215,6 +217,8 @@ def format_benchmark(result: BenchmarkResult, bench: SubBenchmarkResult) -> str:
        table = MarkdownTable()
        row = 0
        for state in bench.states:
+            if state.is_skipped:
+                continue
            if device_id is not None and state.device != device_id:
                continue
            add_state_row(table, row, state, bench)
--- a/python/test/test_benchmark_result.py
+++ b/python/test/test_benchmark_result.py
@@ -207,6 +207,94 @@ def test_state_stores_rich_summary_metadata(sample_state):
    }


+def test_state_preserves_null_summary_values(tmp_path):
+    json_fn = tmp_path / "result.json"
+    write_json(
+        json_fn,
+        {
+            "benchmarks": [
+                {
+                    "name": "copy",
+                    "axes": [],
+                    "states": [
+                        {
+                            "name": "Device=0",
+                            "axis_values": [],
+                            "summaries": [
+                                {
+                                    "tag": "nv/cold/time/gpu/stdev/relative",
+                                    "name": "Noise",
+                                    "hint": "percentage",
+                                    "data": [
+                                        {
+                                            "name": "value",
+                                            "type": "float64",
+                                            "value": None,
+                                        }
+                                    ],
+                                }
+                            ],
+                            "is_skipped": False,
+                        }
+                    ],
+                }
+            ]
+        },
+    )
+
+    summary = results.BenchmarkResult.from_json(json_fn)["copy"][0].summaries[
+        "nv/cold/time/gpu/stdev/relative"
+    ]
+
+    assert summary.value is None
+    assert summary["value"] is None
+
+
+def test_state_reports_malformed_numeric_summary_values(tmp_path):
+    json_fn = tmp_path / "result.json"
+    write_json(
+        json_fn,
+        {
+            "benchmarks": [
+                {
+                    "name": "copy",
+                    "axes": [],
+                    "states": [
+                        {
+                            "name": "Device=0",
+                            "axis_values": [],
+                            "summaries": [
+                                {
+                                    "tag": "nv/cold/time/gpu/mean",
+                                    "name": "GPU Time",
+                                    "hint": "duration",
+                                    "data": [
+                                        {
+                                            "name": "value",
+                                            "type": "float64",
+                                            "value": "not-a-number",
+                                        }
+                                    ],
+                                }
+                            ],
+                            "is_skipped": False,
+                        }
+                    ],
+                }
+            ]
+        },
+    )
+
+    with pytest.raises(
+        ValueError,
+        match=(
+            "summary 'nv/cold/time/gpu/mean' field 'value' "
+            "value 'not-a-number' is not a float64"
+        ),
+    ):
+        results.BenchmarkResult.from_json(json_fn)
+
+
 def test_state_loads_samples_and_frequencies(sample_state):
    assert sample_state.samples is not None
    assert list(sample_state.samples) == pytest.approx([1.0, 2.0, 4.0])
@@ -432,7 +520,7 @@ def test_benchmark_result_normalizes_axis_value_lookup_key():
    assert result.states[2].point == {"NumBlocks": "64"}


-def test_benchmark_result_ignores_skipped_state_with_no_summaries():
+def test_benchmark_result_preserves_skipped_state_with_no_summaries():
    result = results.SubBenchmarkResult(
        {
            "name": "copy_sweep_grid_shape",
@@ -467,8 +555,14 @@ def test_benchmark_result_ignores_skipped_state_with_no_summaries():
        "",
    )

-    assert len(result.states) == 1
-    assert result.states[0].name() == "BlockSize[pow2]=6"
+    assert len(result.states) == 2
+    assert result.states[0].name() == "BlockSize[pow2]=8"
+    assert result.states[0].is_skipped is True
+    assert result.states[0].summaries == {}
+    assert result.states[0].samples is None
+    assert result.states[0].frequencies is None
+    assert result.states[1].name() == "BlockSize[pow2]=6"
+    assert result.states[1].is_skipped is False


 def test_benchmark_result_uses_empty_summaries_when_field_is_missing():
--- a/python/test/test_nvbench_json_summary.py
+++ b/python/test/test_nvbench_json_summary.py
@@ -167,6 +167,19 @@ def test_json_summary_formats_nvbench_style_markdown(tmp_path):
    assert "Min GPU Time" not in report


+def test_json_summary_formats_null_summary_value_as_blank():
+    summary = nvbench_json_summary.BenchmarkResultSummary(
+        tag="nv/cold/time/gpu/stdev/relative",
+        name="Noise",
+        hint="percentage",
+        hide=None,
+        description=None,
+        data={"value": None},
+    )
+
+    assert nvbench_json_summary.format_summary(summary) == ""
+
+
 def test_json_summary_formats_axis_values_like_markdown_printer():
    axes_by_name = {
        "BlockSize": {
@@ -259,6 +272,99 @@ def test_json_summary_formats_state_with_null_axis_values(tmp_path):
    assert "|      7x |" in report


+def test_json_summary_omits_skipped_states(tmp_path):
+    json_path = tmp_path / "result.json"
+    json_path.write_text(
+        json.dumps(
+            {
+                "devices": [
+                    {
+                        "id": 0,
+                        "name": "Test GPU",
+                    }
+                ],
+                "benchmarks": [
+                    {
+                        "name": "copy",
+                        "devices": [0],
+                        "axes": [
+                            {
+                                "name": "BlockSize",
+                                "type": "int64",
+                                "flags": "pow2",
+                                "values": [
+                                    {
+                                        "input_string": "8",
+                                        "description": "2^8 = 256",
+                                        "value": 256,
+                                    },
+                                    {
+                                        "input_string": "9",
+                                        "description": "2^9 = 512",
+                                        "value": 512,
+                                    },
+                                ],
+                            }
+                        ],
+                        "states": [
+                            {
+                                "name": "Device=0 BlockSize=2^8",
+                                "device": 0,
+                                "axis_values": [
+                                    {
+                                        "name": "BlockSize",
+                                        "type": "int64",
+                                        "value": "256",
+                                    }
+                                ],
+                                "summaries": None,
+                                "is_skipped": True,
+                                "skip_reason": "Deadlock detected",
+                            },
+                            {
+                                "name": "Device=0 BlockSize=2^9",
+                                "device": 0,
+                                "axis_values": [
+                                    {
+                                        "name": "BlockSize",
+                                        "type": "int64",
+                                        "value": "512",
+                                    }
+                                ],
+                                "summaries": [
+                                    {
+                                        "tag": "nv/cold/time/gpu/sample_size",
+                                        "name": "Samples",
+                                        "hint": "sample_size",
+                                        "data": [
+                                            {
+                                                "name": "value",
+                                                "type": "int64",
+                                                "value": "3",
+                                            }
+                                        ],
+                                    }
+                                ],
+                                "is_skipped": False,
+                            },
+                        ],
+                    }
+                ],
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    result = nvbench_json_summary.BenchmarkResult.from_json(json_path)
+    report = nvbench_json_summary.format_result(result)
+
+    assert "Skip Reason" not in report
+    assert "Deadlock detected" not in report
+    assert "2^8 = 256" not in report
+    assert "2^9 = 512" in report
+    assert "3x" in report
+
+
 def test_json_summary_cli_writes_output_file(tmp_path):
    json_path = tmp_path / "result.json"
    output_path = tmp_path / "summary.md"