From dd683850f48c49f38b640ef803061b553d47eccc Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Wed, 13 May 2026 11:42:40 -0500
Subject: [PATCH] Addressed both issues raised in review

Malformed values are now represented in result as None.

Skipped benchmarks are no longer dropped, i.e., they are present
in BenchmarkResult data, but they are not reflected in summary
table in line with what NVBench-instrumented benchmarks do.
---
 python/cuda/bench/results/__init__.pyi        |   2 +-
 .../cuda/bench/results/_benchmark_result.py   |  75 ++++++++++---
 python/scripts/nvbench_json_summary.py        |   4 +
 python/test/test_benchmark_result.py          | 100 ++++++++++++++++-
 python/test/test_nvbench_json_summary.py      | 106 ++++++++++++++++++
 5 files changed, 266 insertions(+), 21 deletions(-)

diff --git a/python/cuda/bench/results/__init__.pyi b/python/cuda/bench/results/__init__.pyi
index 2928441..8698229 100644
--- a/python/cuda/bench/results/__init__.pyi
+++ b/python/cuda/bench/results/__init__.pyi
@@ -8,7 +8,7 @@ from typing import Any, TypeVar, overload
 
 ResultT = TypeVar("ResultT")
 BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult")
-_SummaryValue = int | float | str
+_SummaryValue = int | float | str | None
 
 class BenchmarkResultDevice:
     id: int
diff --git a/python/cuda/bench/results/_benchmark_result.py b/python/cuda/bench/results/_benchmark_result.py
index 6695aba..5b7a49f 100644
--- a/python/cuda/bench/results/_benchmark_result.py
+++ b/python/cuda/bench/results/_benchmark_result.py
@@ -21,7 +21,7 @@ __all__ = [
 
 ResultT = TypeVar("ResultT")
 BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult")
-_SummaryValue = int | float | str
+_SummaryValue = int | float | str | None
 
 
 @dataclass(frozen=True)
@@ -79,16 +79,49 @@ def extract_size(summary: dict) -> int:
         ) from e
 
 
-def parse_summary_value(value_data: dict) -> _SummaryValue:
-    value_type = value_data["type"]
+def parse_summary_value(
+    value_data: dict,
+    *,
+    summary_tag: str,
+    field_name: str,
+) -> _SummaryValue:
+    value_type = value_data.get("type")
+    if "value" not in value_data:
+        raise ValueError(
+            f"summary {summary_tag!r} field {field_name!r} is missing value"
+        )
+
     value = value_data["value"]
+    if value is None:
+        return None
+
     if value_type == "int64":
-        return int(value)
+        try:
+            return int(value)
+        except (TypeError, ValueError) as e:
+            raise ValueError(
+                f"summary {summary_tag!r} field {field_name!r} value {value!r} "
+                "is not an int64"
+            ) from e
     if value_type == "float64":
-        return float(value)
+        try:
+            return float(value)
+        except (TypeError, ValueError) as e:
+            raise ValueError(
+                f"summary {summary_tag!r} field {field_name!r} value {value!r} "
+                "is not a float64"
+            ) from e
     if value_type == "string":
+        if not isinstance(value, str):
+            raise ValueError(
+                f"summary {summary_tag!r} field {field_name!r} value {value!r} "
+                "is not a string"
+            )
         return value
-    raise ValueError(f"unsupported summary value type: {value_type}")
+    raise ValueError(
+        f"summary {summary_tag!r} field {field_name!r} has unsupported "
+        f"value type {value_type!r}"
+    )
 
 
 @dataclass(frozen=True)
@@ -116,12 +149,22 @@ class BenchmarkResultSummary:
 
 
 def parse_summary(summary: dict) -> BenchmarkResultSummary:
-    data = {
-        value_data["name"]: parse_summary_value(value_data)
-        for value_data in summary.get("data", [])
-    }
+    summary_tag = summary["tag"]
+    data = {}
+    for value_data in summary.get("data", []):
+        field_name = value_data.get("name")
+        if not isinstance(field_name, str):
+            raise ValueError(
+                f"summary {summary_tag!r} has a data entry with a missing "
+                "or non-string name"
+            )
+        data[field_name] = parse_summary_value(
+            value_data,
+            summary_tag=summary_tag,
+            field_name=field_name,
+        )
     return BenchmarkResultSummary(
-        tag=summary["tag"],
+        tag=summary_tag,
         name=summary.get("name"),
         hint=summary.get("hint"),
         hide=summary.get("hide"),
@@ -324,12 +367,10 @@ class SubBenchmarkResult:
             axes_names[short_name] = full_name
             axes_values[short_name] = this_axis_values
 
-        self.states = []
-        for state in bench["states"]:
-            if not state.get("is_skipped", False):
-                self.states.append(
-                    SubBenchmarkState(state, axes_names, axes_values, json_dir)
-                )
+        self.states = [
+            SubBenchmarkState(state, axes_names, axes_values, json_dir)
+            for state in bench["states"]
+        ]
 
     def __repr__(self) -> str:
         return str(self.__dict__)
diff --git a/python/scripts/nvbench_json_summary.py b/python/scripts/nvbench_json_summary.py
index 1478bff..2252b66 100644
--- a/python/scripts/nvbench_json_summary.py
+++ b/python/scripts/nvbench_json_summary.py
@@ -143,6 +143,8 @@ def format_percentage(summary: BenchmarkResultSummary) -> str:
 
 
 def format_summary(summary: BenchmarkResultSummary) -> str:
+    if summary.value is None:
+        return ""
     if summary.hint == "duration":
         return format_duration(summary)
     if summary.hint == "item_rate":
@@ -215,6 +217,8 @@ def format_benchmark(result: BenchmarkResult, bench: SubBenchmarkResult) -> str:
         table = MarkdownTable()
         row = 0
         for state in bench.states:
+            if state.is_skipped:
+                continue
             if device_id is not None and state.device != device_id:
                 continue
             add_state_row(table, row, state, bench)
diff --git a/python/test/test_benchmark_result.py b/python/test/test_benchmark_result.py
index 48c361a..e123730 100644
--- a/python/test/test_benchmark_result.py
+++ b/python/test/test_benchmark_result.py
@@ -207,6 +207,94 @@ def test_state_stores_rich_summary_metadata(sample_state):
     }
 
 
+def test_state_preserves_null_summary_values(tmp_path):
+    json_fn = tmp_path / "result.json"
+    write_json(
+        json_fn,
+        {
+            "benchmarks": [
+                {
+                    "name": "copy",
+                    "axes": [],
+                    "states": [
+                        {
+                            "name": "Device=0",
+                            "axis_values": [],
+                            "summaries": [
+                                {
+                                    "tag": "nv/cold/time/gpu/stdev/relative",
+                                    "name": "Noise",
+                                    "hint": "percentage",
+                                    "data": [
+                                        {
+                                            "name": "value",
+                                            "type": "float64",
+                                            "value": None,
+                                        }
+                                    ],
+                                }
+                            ],
+                            "is_skipped": False,
+                        }
+                    ],
+                }
+            ]
+        },
+    )
+
+    summary = results.BenchmarkResult.from_json(json_fn)["copy"][0].summaries[
+        "nv/cold/time/gpu/stdev/relative"
+    ]
+
+    assert summary.value is None
+    assert summary["value"] is None
+
+
+def test_state_reports_malformed_numeric_summary_values(tmp_path):
+    json_fn = tmp_path / "result.json"
+    write_json(
+        json_fn,
+        {
+            "benchmarks": [
+                {
+                    "name": "copy",
+                    "axes": [],
+                    "states": [
+                        {
+                            "name": "Device=0",
+                            "axis_values": [],
+                            "summaries": [
+                                {
+                                    "tag": "nv/cold/time/gpu/mean",
+                                    "name": "GPU Time",
+                                    "hint": "duration",
+                                    "data": [
+                                        {
+                                            "name": "value",
+                                            "type": "float64",
+                                            "value": "not-a-number",
+                                        }
+                                    ],
+                                }
+                            ],
+                            "is_skipped": False,
+                        }
+                    ],
+                }
+            ]
+        },
+    )
+
+    with pytest.raises(
+        ValueError,
+        match=(
+            "summary 'nv/cold/time/gpu/mean' field 'value' "
+            "value 'not-a-number' is not a float64"
+        ),
+    ):
+        results.BenchmarkResult.from_json(json_fn)
+
+
 def test_state_loads_samples_and_frequencies(sample_state):
     assert sample_state.samples is not None
     assert list(sample_state.samples) == pytest.approx([1.0, 2.0, 4.0])
@@ -432,7 +520,7 @@ def test_benchmark_result_normalizes_axis_value_lookup_key():
     assert result.states[2].point == {"NumBlocks": "64"}
 
 
-def test_benchmark_result_ignores_skipped_state_with_no_summaries():
+def test_benchmark_result_preserves_skipped_state_with_no_summaries():
     result = results.SubBenchmarkResult(
         {
             "name": "copy_sweep_grid_shape",
@@ -467,8 +555,14 @@ def test_benchmark_result_ignores_skipped_state_with_no_summaries():
         "",
     )
 
-    assert len(result.states) == 1
-    assert result.states[0].name() == "BlockSize[pow2]=6"
+    assert len(result.states) == 2
+    assert result.states[0].name() == "BlockSize[pow2]=8"
+    assert result.states[0].is_skipped is True
+    assert result.states[0].summaries == {}
+    assert result.states[0].samples is None
+    assert result.states[0].frequencies is None
+    assert result.states[1].name() == "BlockSize[pow2]=6"
+    assert result.states[1].is_skipped is False
 
 
 def test_benchmark_result_uses_empty_summaries_when_field_is_missing():
diff --git a/python/test/test_nvbench_json_summary.py b/python/test/test_nvbench_json_summary.py
index 0489e5d..3bba73b 100644
--- a/python/test/test_nvbench_json_summary.py
+++ b/python/test/test_nvbench_json_summary.py
@@ -167,6 +167,19 @@ def test_json_summary_formats_nvbench_style_markdown(tmp_path):
     assert "Min GPU Time" not in report
 
 
+def test_json_summary_formats_null_summary_value_as_blank():
+    summary = nvbench_json_summary.BenchmarkResultSummary(
+        tag="nv/cold/time/gpu/stdev/relative",
+        name="Noise",
+        hint="percentage",
+        hide=None,
+        description=None,
+        data={"value": None},
+    )
+
+    assert nvbench_json_summary.format_summary(summary) == ""
+
+
 def test_json_summary_formats_axis_values_like_markdown_printer():
     axes_by_name = {
         "BlockSize": {
@@ -259,6 +272,99 @@ def test_json_summary_formats_state_with_null_axis_values(tmp_path):
     assert "|      7x |" in report
 
 
+def test_json_summary_omits_skipped_states(tmp_path):
+    json_path = tmp_path / "result.json"
+    json_path.write_text(
+        json.dumps(
+            {
+                "devices": [
+                    {
+                        "id": 0,
+                        "name": "Test GPU",
+                    }
+                ],
+                "benchmarks": [
+                    {
+                        "name": "copy",
+                        "devices": [0],
+                        "axes": [
+                            {
+                                "name": "BlockSize",
+                                "type": "int64",
+                                "flags": "pow2",
+                                "values": [
+                                    {
+                                        "input_string": "8",
+                                        "description": "2^8 = 256",
+                                        "value": 256,
+                                    },
+                                    {
+                                        "input_string": "9",
+                                        "description": "2^9 = 512",
+                                        "value": 512,
+                                    },
+                                ],
+                            }
+                        ],
+                        "states": [
+                            {
+                                "name": "Device=0 BlockSize=2^8",
+                                "device": 0,
+                                "axis_values": [
+                                    {
+                                        "name": "BlockSize",
+                                        "type": "int64",
+                                        "value": "256",
+                                    }
+                                ],
+                                "summaries": None,
+                                "is_skipped": True,
+                                "skip_reason": "Deadlock detected",
+                            },
+                            {
+                                "name": "Device=0 BlockSize=2^9",
+                                "device": 0,
+                                "axis_values": [
+                                    {
+                                        "name": "BlockSize",
+                                        "type": "int64",
+                                        "value": "512",
+                                    }
+                                ],
+                                "summaries": [
+                                    {
+                                        "tag": "nv/cold/time/gpu/sample_size",
+                                        "name": "Samples",
+                                        "hint": "sample_size",
+                                        "data": [
+                                            {
+                                                "name": "value",
+                                                "type": "int64",
+                                                "value": "3",
+                                            }
+                                        ],
+                                    }
+                                ],
+                                "is_skipped": False,
+                            },
+                        ],
+                    }
+                ],
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    result = nvbench_json_summary.BenchmarkResult.from_json(json_path)
+    report = nvbench_json_summary.format_result(result)
+
+    assert "Skip Reason" not in report
+    assert "Deadlock detected" not in report
+    assert "2^8 = 256" not in report
+    assert "2^9 = 512" in report
+    assert "3x" in report
+
+
 def test_json_summary_cli_writes_output_file(tmp_path):
     json_path = tmp_path / "result.json"
     output_path = tmp_path / "summary.md"