Files
nvbench/python/test/test_benchmark_result.py
Oleksandr Pavlyk dd683850f4 Addressed both issues raised in review
Malformed values are now represented in result as None.

Skipped benchmarks are no longer dropped, i.e., they are present
in BenchmarkResult data, but they are not reflected in summary
table in line with what NVBench-instrumented benchmarks do.
2026-05-13 12:35:09 -05:00

751 lines
24 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
import json
import struct
from dataclasses import dataclass
import cuda.bench
import cuda.bench.results as results
import pytest
def write_json(path, data):
path.write_text(json.dumps(data), encoding="utf-8")
def block_size_axis(*values):
return {
"name": "BlockSize",
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": str(value),
"description": f"2^{value} = {2**value}",
"value": 2**value,
}
for value in values
],
}
def sample_file_summary(tag, filename, size):
return {
"tag": tag,
"data": [
{
"name": "filename",
"type": "string",
"value": filename,
},
{
"name": "size",
"type": "int64",
"value": str(size),
},
],
}
def sample_times_summary(filename, size):
return sample_file_summary(
"nv/json/bin:nv/cold/sample_times",
filename,
size,
)
def sample_frequencies_summary(filename, size):
return sample_file_summary(
"nv/json/freqs-bin:nv/cold/sample_freqs",
filename,
size,
)
def bwutil_summary(value):
return {
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"hint": "percentage",
"description": "Global memory utilization",
"data": [
{
"name": "value",
"type": "float64",
"value": str(value),
}
],
}
@pytest.fixture
def sample_result_path(tmp_path):
bin_dir = tmp_path / "result.json-bin"
bin_dir.mkdir()
(bin_dir / "0.bin").write_bytes(struct.pack("<3f", 1.0, 2.0, 4.0))
freq_bin_dir = tmp_path / "result.json-freqs-bin"
freq_bin_dir.mkdir()
(freq_bin_dir / "0.bin").write_bytes(struct.pack("<3f", 100.0, 200.0, 400.0))
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [block_size_axis(8)],
"states": [
{
"name": "Device=0 BlockSize=2^8",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256",
}
],
"summaries": [
sample_times_summary("result.json-bin/0.bin", 3),
bwutil_summary(0.75),
sample_frequencies_summary(
"result.json-freqs-bin/0.bin",
3,
),
],
"is_skipped": False,
}
],
}
]
},
)
return json_fn
@pytest.fixture
def sample_result(sample_result_path):
return results.BenchmarkResult.from_json(sample_result_path)
@pytest.fixture
def sample_subbenchmark(sample_result):
return sample_result["copy"]
@pytest.fixture
def sample_state(sample_subbenchmark):
return sample_subbenchmark[0]
def test_result_classes_are_exposed_from_results_namespace():
assert results.BenchmarkResult.__module__ == results.__name__
assert results.BenchmarkResultSummary.__module__ == results.__name__
assert not hasattr(cuda.bench, "BenchmarkResult")
def test_from_json_preserves_optional_metadata(sample_result_path):
metadata = {"returncode": 0, "elapsed_seconds": 0.25}
default_result = results.BenchmarkResult.from_json(sample_result_path)
result = results.BenchmarkResult.from_json(sample_result_path, metadata=metadata)
assert default_result.metadata is None
assert result.metadata is metadata
def test_benchmark_result_implements_mapping_protocol(sample_result):
subbenchmark = sample_result["copy"]
assert len(sample_result) == 1
assert list(sample_result) == ["copy"]
assert list(sample_result.keys()) == ["copy"]
assert list(sample_result.values()) == [subbenchmark]
assert list(sample_result.items()) == [("copy", subbenchmark)]
assert "copy" in sample_result
assert "missing" not in sample_result
assert subbenchmark is sample_result.subbenches["copy"]
with pytest.raises(KeyError):
sample_result["missing"]
def test_subbenchmark_result_implements_sequence_protocol(sample_subbenchmark):
state = sample_subbenchmark[0]
assert len(sample_subbenchmark) == 1
assert sample_subbenchmark[-1] is state
assert sample_subbenchmark[:] == sample_subbenchmark.states
assert list(sample_subbenchmark) == sample_subbenchmark.states
with pytest.raises(IndexError):
sample_subbenchmark[1]
def test_state_parses_axis_name_and_bandwidth(sample_state):
assert sample_state.name() == "BlockSize[pow2]=8"
assert sample_state.bw == 0.75
def test_state_stores_rich_summary_metadata(sample_state):
bw_summary = sample_state.summaries["nv/cold/bw/global/utilization"]
assert bw_summary.tag == "nv/cold/bw/global/utilization"
assert bw_summary.name == "BWUtil"
assert bw_summary.hint == "percentage"
assert bw_summary.hide is None
assert bw_summary.description == "Global memory utilization"
assert bw_summary.value == pytest.approx(0.75)
assert bw_summary["value"] == pytest.approx(0.75)
assert sample_state.summaries["nv/json/bin:nv/cold/sample_times"].data == {
"filename": "result.json-bin/0.bin",
"size": 3,
}
assert sample_state.summaries["nv/json/freqs-bin:nv/cold/sample_freqs"].data == {
"filename": "result.json-freqs-bin/0.bin",
"size": 3,
}
def test_state_preserves_null_summary_values(tmp_path):
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [],
"states": [
{
"name": "Device=0",
"axis_values": [],
"summaries": [
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": None,
}
],
}
],
"is_skipped": False,
}
],
}
]
},
)
summary = results.BenchmarkResult.from_json(json_fn)["copy"][0].summaries[
"nv/cold/time/gpu/stdev/relative"
]
assert summary.value is None
assert summary["value"] is None
def test_state_reports_malformed_numeric_summary_values(tmp_path):
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [],
"states": [
{
"name": "Device=0",
"axis_values": [],
"summaries": [
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "not-a-number",
}
],
}
],
"is_skipped": False,
}
],
}
]
},
)
with pytest.raises(
ValueError,
match=(
"summary 'nv/cold/time/gpu/mean' field 'value' "
"value 'not-a-number' is not a float64"
),
):
results.BenchmarkResult.from_json(json_fn)
def test_state_loads_samples_and_frequencies(sample_state):
assert sample_state.samples is not None
assert list(sample_state.samples) == pytest.approx([1.0, 2.0, 4.0])
assert sample_state.frequencies is not None
assert list(sample_state.frequencies) == pytest.approx([100.0, 200.0, 400.0])
def test_centers_apply_estimators_to_samples(sample_result):
centers = sample_result.centers(lambda samples: sum(samples) / len(samples))
assert centers == {"copy": {"BlockSize[pow2]=8": pytest.approx(7.0 / 3.0)}}
def test_centers_with_frequencies_apply_estimators(sample_result, sample_subbenchmark):
def weighted_mean(samples, frequencies):
return sum(
sample * frequency for sample, frequency in zip(samples, frequencies)
) / sum(frequencies)
weighted_centers = sample_result.centers_with_frequencies(weighted_mean)
assert weighted_centers == {"copy": {"BlockSize[pow2]=8": pytest.approx(3.0)}}
assert (
sample_subbenchmark.centers_with_frequencies(weighted_mean)
== weighted_centers["copy"]
)
def test_benchmark_result_constructor_is_private():
with pytest.raises(TypeError, match="from_json\\(\\).*empty\\(\\)"):
results.BenchmarkResult()
with pytest.raises(TypeError, match="from_json\\(\\).*empty\\(\\)"):
results.BenchmarkResult("result.json")
with pytest.raises(TypeError):
results.BenchmarkResult(metadata=None)
with pytest.raises(TypeError):
results.BenchmarkResult(json_path="result.json", parse=False)
def test_benchmark_result_empty_does_not_read_json(tmp_path):
@dataclass
class RunMetadata:
returncode: int
elapsed_seconds: float
metadata = RunMetadata(returncode=1, elapsed_seconds=0.25)
missing_json = tmp_path / "missing.json"
result = results.BenchmarkResult.empty(metadata=metadata)
assert result.metadata is metadata
assert result.subbenches == {}
with pytest.raises(FileNotFoundError):
results.BenchmarkResult.from_json(missing_json, metadata=metadata)
with pytest.raises(FileNotFoundError):
results.BenchmarkResult.from_json(json_path=missing_json, metadata=metadata)
def test_benchmark_result_accepts_no_axis_benchmark_with_recorded_binary_path(
tmp_path, monkeypatch
):
data_dir = tmp_path / "temp_data"
data_dir.mkdir()
bin_dir = data_dir / "axes_run1.json-bin"
bin_dir.mkdir()
(bin_dir / "0.bin").write_bytes(struct.pack("<2f", 1.0, 4.0))
freq_bin_dir = data_dir / "axes_run1.json-freqs-bin"
freq_bin_dir.mkdir()
(freq_bin_dir / "0.bin").write_bytes(struct.pack("<2f", 100.0, 400.0))
json_fn = data_dir / "axes_run1.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "simple",
"axes": None,
"states": [
{
"name": "Device=0",
"axis_values": None,
"summaries": [
sample_times_summary(
"temp_data/axes_run1.json-bin/0.bin",
2,
),
sample_frequencies_summary(
"temp_data/axes_run1.json-freqs-bin/0.bin",
2,
),
],
"is_skipped": False,
}
],
}
]
},
)
monkeypatch.chdir(tmp_path)
result = results.BenchmarkResult.from_json("temp_data/axes_run1.json")
state = result.subbenches["simple"].states[0]
assert state.name() == "Device=0"
assert state.point == {}
assert state.samples is not None
assert list(state.samples) == pytest.approx([1.0, 4.0])
assert state.frequencies is not None
assert list(state.frequencies) == pytest.approx([100.0, 400.0])
def test_benchmark_result_accepts_axis_value_input_string():
result = results.SubBenchmarkResult(
{
"name": "single_float64_axis",
"axes": [
{
"name": "Duration",
"type": "float64",
"flags": "",
"values": [
{
"input_string": "0",
"description": "",
"value": 0.0,
}
],
}
],
"states": [
{
"name": "Device=0 Duration=0",
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0",
}
],
"summaries": [],
"is_skipped": False,
}
],
},
"",
)
state = result.states[0]
assert state.name() == "Duration=0"
assert state.point == {"Duration": "0"}
def test_benchmark_result_normalizes_axis_value_lookup_key():
result = results.SubBenchmarkResult(
{
"name": "num_blocks",
"axes": [
{
"name": "NumBlocks",
"type": "int64",
"flags": "",
"values": [
{
"input_string": "64",
"description": "",
"value": 64,
},
{
"input_string": "default",
"description": "",
"value": None,
},
],
}
],
"states": [
{
"name": "Device=0 NumBlocks=64",
"axis_values": [
{
"name": "NumBlocks",
"type": "int64",
"value": 64,
}
],
"summaries": [],
"is_skipped": False,
},
{
"name": "Device=0 NumBlocks=default",
"axis_values": [
{
"name": "NumBlocks",
"type": "int64",
"value": None,
}
],
"summaries": [],
"is_skipped": False,
},
{
"name": "Device=0 NumBlocks=64",
"axis_values": [
{
"name": "NumBlocks",
"type": "int64",
"input_string": "64",
}
],
"summaries": [],
"is_skipped": False,
},
],
},
"",
)
assert result.states[0].point == {"NumBlocks": "64"}
assert result.states[1].point == {"NumBlocks": "default"}
assert result.states[2].point == {"NumBlocks": "64"}
def test_benchmark_result_preserves_skipped_state_with_no_summaries():
result = results.SubBenchmarkResult(
{
"name": "copy_sweep_grid_shape",
"axes": [block_size_axis(6, 8)],
"states": [
{
"name": "Device=0 BlockSize=2^8",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256",
}
],
"summaries": None,
"is_skipped": True,
},
{
"name": "Device=0 BlockSize=2^6",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64",
}
],
"summaries": [],
"is_skipped": False,
},
],
},
"",
)
assert len(result.states) == 2
assert result.states[0].name() == "BlockSize[pow2]=8"
assert result.states[0].is_skipped is True
assert result.states[0].summaries == {}
assert result.states[0].samples is None
assert result.states[0].frequencies is None
assert result.states[1].name() == "BlockSize[pow2]=6"
assert result.states[1].is_skipped is False
def test_benchmark_result_uses_empty_summaries_when_field_is_missing():
result = results.SubBenchmarkResult(
{
"name": "copy_sweep_grid_shape",
"axes": [block_size_axis(8)],
"states": [
{
"name": "Device=0 BlockSize=2^8",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256",
}
],
"is_skipped": False,
},
],
},
"",
)
state = result.states[0]
assert state.name() == "BlockSize[pow2]=8"
assert state.summaries == {}
assert state.samples is None
assert state.frequencies is None
assert state.bw is None
@pytest.mark.parametrize(
"field_name,bad_type,expected_type",
[
("filename", "int64", "string"),
("size", "string", "int64"),
],
)
def test_benchmark_result_validates_binary_summary_field_types(
field_name, bad_type, expected_type
):
summary = sample_times_summary("result.json-bin/0.bin", 3)
for value_data in summary["data"]:
if value_data["name"] == field_name:
value_data["type"] = bad_type
if field_name == "filename":
value_data["value"] = "123"
with pytest.raises(
ValueError,
match=rf"field '{field_name}' has type '{bad_type}'; expected '{expected_type}'",
):
results.SubBenchmarkResult(
{
"name": "copy",
"axes": [],
"states": [
{
"name": "Device=0",
"axis_values": [],
"summaries": [summary],
"is_skipped": False,
}
],
},
"",
)
def test_benchmark_result_uses_none_for_unavailable_samples(tmp_path):
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [block_size_axis(8, 9)],
"states": [
{
"name": "Device=0 BlockSize=2^8",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256",
}
],
"summaries": [],
"is_skipped": False,
},
{
"name": "Device=0 BlockSize=2^9",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "512",
}
],
"summaries": [
sample_times_summary(
"result.json-bin/missing.bin",
3,
),
sample_frequencies_summary(
"result.json-freqs-bin/missing.bin",
3,
),
],
"is_skipped": False,
},
],
}
]
},
)
result = results.BenchmarkResult.from_json(json_fn)
states = result.subbenches["copy"].states
assert states[0].samples is None
assert states[1].samples is None
assert states[0].frequencies is None
assert states[1].frequencies is None
assert result.centers(lambda samples: pytest.fail("estimator should not run")) == {
"copy": {
"BlockSize[pow2]=8": None,
"BlockSize[pow2]=9": None,
}
}
assert result.centers_with_frequencies(
lambda samples, frequencies: pytest.fail("estimator should not run")
) == {
"copy": {
"BlockSize[pow2]=8": None,
"BlockSize[pow2]=9": None,
}
}
def test_benchmark_result_rejects_mismatched_sample_and_frequency_counts(tmp_path):
bin_dir = tmp_path / "result.json-bin"
bin_dir.mkdir()
(bin_dir / "0.bin").write_bytes(struct.pack("<3f", 1.0, 2.0, 4.0))
freq_bin_dir = tmp_path / "result.json-freqs-bin"
freq_bin_dir.mkdir()
(freq_bin_dir / "0.bin").write_bytes(struct.pack("<2f", 100.0, 200.0))
json_fn = tmp_path / "result.json"
write_json(
json_fn,
{
"benchmarks": [
{
"name": "copy",
"axes": [block_size_axis(8)],
"states": [
{
"name": "Device=0 BlockSize=2^8",
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256",
}
],
"summaries": [
sample_times_summary("result.json-bin/0.bin", 3),
sample_frequencies_summary(
"result.json-freqs-bin/0.bin",
2,
),
],
"is_skipped": False,
}
],
}
]
},
)
with pytest.raises(ValueError, match="sample count .* frequency count"):
results.BenchmarkResult.from_json(json_fn)