Addressed both issues raised in review

Malformed values are now represented in result as None.

Skipped benchmarks are no longer dropped, i.e., they are present
in BenchmarkResult data, but they are not reflected in summary
table in line with what NVBench-instrumented benchmarks do.
This commit is contained in:
Oleksandr Pavlyk
2026-05-13 11:42:40 -05:00
parent a38bf890f0
commit dd683850f4
5 changed files with 266 additions and 21 deletions

View File

@@ -8,7 +8,7 @@ from typing import Any, TypeVar, overload
ResultT = TypeVar("ResultT")
BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult")
_SummaryValue = int | float | str
_SummaryValue = int | float | str | None
class BenchmarkResultDevice:
id: int

View File

@@ -21,7 +21,7 @@ __all__ = [
ResultT = TypeVar("ResultT")
BenchmarkResultT = TypeVar("BenchmarkResultT", bound="BenchmarkResult")
_SummaryValue = int | float | str
_SummaryValue = int | float | str | None
@dataclass(frozen=True)
@@ -79,16 +79,49 @@ def extract_size(summary: dict) -> int:
) from e
def parse_summary_value(value_data: dict) -> _SummaryValue:
value_type = value_data["type"]
def parse_summary_value(
value_data: dict,
*,
summary_tag: str,
field_name: str,
) -> _SummaryValue:
value_type = value_data.get("type")
if "value" not in value_data:
raise ValueError(
f"summary {summary_tag!r} field {field_name!r} is missing value"
)
value = value_data["value"]
if value is None:
return None
if value_type == "int64":
return int(value)
try:
return int(value)
except (TypeError, ValueError) as e:
raise ValueError(
f"summary {summary_tag!r} field {field_name!r} value {value!r} "
"is not an int64"
) from e
if value_type == "float64":
return float(value)
try:
return float(value)
except (TypeError, ValueError) as e:
raise ValueError(
f"summary {summary_tag!r} field {field_name!r} value {value!r} "
"is not a float64"
) from e
if value_type == "string":
if not isinstance(value, str):
raise ValueError(
f"summary {summary_tag!r} field {field_name!r} value {value!r} "
"is not a string"
)
return value
raise ValueError(f"unsupported summary value type: {value_type}")
raise ValueError(
f"summary {summary_tag!r} field {field_name!r} has unsupported "
f"value type {value_type!r}"
)
@dataclass(frozen=True)
@@ -116,12 +149,22 @@ class BenchmarkResultSummary:
def parse_summary(summary: dict) -> BenchmarkResultSummary:
data = {
value_data["name"]: parse_summary_value(value_data)
for value_data in summary.get("data", [])
}
summary_tag = summary["tag"]
data = {}
for value_data in summary.get("data", []):
field_name = value_data.get("name")
if not isinstance(field_name, str):
raise ValueError(
f"summary {summary_tag!r} has a data entry with a missing "
"or non-string name"
)
data[field_name] = parse_summary_value(
value_data,
summary_tag=summary_tag,
field_name=field_name,
)
return BenchmarkResultSummary(
tag=summary["tag"],
tag=summary_tag,
name=summary.get("name"),
hint=summary.get("hint"),
hide=summary.get("hide"),
@@ -324,12 +367,10 @@ class SubBenchmarkResult:
axes_names[short_name] = full_name
axes_values[short_name] = this_axis_values
self.states = []
for state in bench["states"]:
if not state.get("is_skipped", False):
self.states.append(
SubBenchmarkState(state, axes_names, axes_values, json_dir)
)
self.states = [
SubBenchmarkState(state, axes_names, axes_values, json_dir)
for state in bench["states"]
]
def __repr__(self) -> str:
return str(self.__dict__)

View File

@@ -143,6 +143,8 @@ def format_percentage(summary: BenchmarkResultSummary) -> str:
def format_summary(summary: BenchmarkResultSummary) -> str:
if summary.value is None:
return ""
if summary.hint == "duration":
return format_duration(summary)
if summary.hint == "item_rate":
@@ -215,6 +217,8 @@ def format_benchmark(result: BenchmarkResult, bench: SubBenchmarkResult) -> str:
table = MarkdownTable()
row = 0
for state in bench.states:
if state.is_skipped:
continue
if device_id is not None and state.device != device_id:
continue
add_state_row(table, row, state, bench)

View File

@@ -207,6 +207,94 @@ def test_state_stores_rich_summary_metadata(sample_state):
}
def test_state_preserves_null_summary_values(tmp_path):
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [],
"states": [
{
"name": "Device=0",
"axis_values": [],
"summaries": [
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": None,
}
],
}
],
"is_skipped": False,
}
],
}
]
},
)
summary = results.BenchmarkResult.from_json(json_fn)["copy"][0].summaries[
"nv/cold/time/gpu/stdev/relative"
]
assert summary.value is None
assert summary["value"] is None
def test_state_reports_malformed_numeric_summary_values(tmp_path):
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [],
"states": [
{
"name": "Device=0",
"axis_values": [],
"summaries": [
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "not-a-number",
}
],
}
],
"is_skipped": False,
}
],
}
]
},
)
with pytest.raises(
ValueError,
match=(
"summary 'nv/cold/time/gpu/mean' field 'value' "
"value 'not-a-number' is not a float64"
),
):
results.BenchmarkResult.from_json(json_fn)
def test_state_loads_samples_and_frequencies(sample_state):
assert sample_state.samples is not None
assert list(sample_state.samples) == pytest.approx([1.0, 2.0, 4.0])
@@ -432,7 +520,7 @@ def test_benchmark_result_normalizes_axis_value_lookup_key():
assert result.states[2].point == {"NumBlocks": "64"}
def test_benchmark_result_ignores_skipped_state_with_no_summaries():
def test_benchmark_result_preserves_skipped_state_with_no_summaries():
result = results.SubBenchmarkResult(
{
"name": "copy_sweep_grid_shape",
@@ -467,8 +555,14 @@ def test_benchmark_result_ignores_skipped_state_with_no_summaries():
"",
)
assert len(result.states) == 1
assert result.states[0].name() == "BlockSize[pow2]=6"
assert len(result.states) == 2
assert result.states[0].name() == "BlockSize[pow2]=8"
assert result.states[0].is_skipped is True
assert result.states[0].summaries == {}
assert result.states[0].samples is None
assert result.states[0].frequencies is None
assert result.states[1].name() == "BlockSize[pow2]=6"
assert result.states[1].is_skipped is False
def test_benchmark_result_uses_empty_summaries_when_field_is_missing():

View File

@@ -167,6 +167,19 @@ def test_json_summary_formats_nvbench_style_markdown(tmp_path):
assert "Min GPU Time" not in report
def test_json_summary_formats_null_summary_value_as_blank():
summary = nvbench_json_summary.BenchmarkResultSummary(
tag="nv/cold/time/gpu/stdev/relative",
name="Noise",
hint="percentage",
hide=None,
description=None,
data={"value": None},
)
assert nvbench_json_summary.format_summary(summary) == ""
def test_json_summary_formats_axis_values_like_markdown_printer():
axes_by_name = {
"BlockSize": {
@@ -259,6 +272,99 @@ def test_json_summary_formats_state_with_null_axis_values(tmp_path):
assert "| 7x |" in report
def test_json_summary_omits_skipped_states(tmp_path):
json_path = tmp_path / "result.json"
json_path.write_text(
json.dumps(
{
"devices": [
{
"id": 0,
"name": "Test GPU",
}
],
"benchmarks": [
{
"name": "copy",
"devices": [0],
"axes": [
{
"name": "BlockSize",
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256,
},
{
"input_string": "9",
"description": "2^9 = 512",
"value": 512,
},
],
}
],
"states": [
{
"name": "Device=0 BlockSize=2^8",
"device": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256",
}
],
"summaries": None,
"is_skipped": True,
"skip_reason": "Deadlock detected",
},
{
"name": "Device=0 BlockSize=2^9",
"device": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "512",
}
],
"summaries": [
{
"tag": "nv/cold/time/gpu/sample_size",
"name": "Samples",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "3",
}
],
}
],
"is_skipped": False,
},
],
}
],
}
),
encoding="utf-8",
)
result = nvbench_json_summary.BenchmarkResult.from_json(json_path)
report = nvbench_json_summary.format_result(result)
assert "Skip Reason" not in report
assert "Deadlock detected" not in report
assert "2^8 = 256" not in report
assert "2^9 = 512" in report
assert "3x" in report
def test_json_summary_cli_writes_output_file(tmp_path):
json_path = tmp_path / "result.json"
output_path = tmp_path / "summary.md"