From 4744d26d26bfd19b2ca5fe3b02124f73ac3abbcd Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:29:34 -0500 Subject: [PATCH] Keep nvbench-compare bulk debug output executable * Define nan and inf in generated --bulk-debug-python scripts so pprint output for non-finite timing values remains valid Python code. Add a regression test that executes the generated script and verifies nan/inf values round-trip. * Sharpen bulk-cycle confirmation gating. Only suppress summary-clock fallback when both reference and compare inputs provide paired, non-empty bulk sample/frequency payloads. Missing or empty bulk files are treated as unavailable evidence and still allow sm_clock_rate/mean fallback, while malformed non-empty payloads continue to produce AMBG. Add regression coverage for missing bulk files falling back to summary-cycle confirmation. These changes resolve automated review feedback --- python/scripts/nvbench_compare.py | 37 ++++++++++++++++--- python/test/test_nvbench_compare.py | 55 ++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/python/scripts/nvbench_compare.py b/python/scripts/nvbench_compare.py index 81f33c1..a505fec 100644 --- a/python/scripts/nvbench_compare.py +++ b/python/scripts/nvbench_compare.py @@ -1026,6 +1026,10 @@ def format_bulk_debug_python(bulk_rows: list[dict[str, Any]]) -> str: "# NVB-BULK-BEGIN\n" "# Generated by nvbench-compare --bulk-debug-python.\n" "import numpy as np\n\n" + "# pprint emits bare nan/inf tokens for non-finite floats.\n" + "# Define them so this generated script remains executable.\n" + 'nan = float("nan")\n' + 'inf = float("inf")\n\n' f"bulk_rows = {pprint.pformat(bulk_rows, sort_dicts=False)}\n\n" "def read_float32(filename, expected_count=None):\n" " if filename is None:\n" @@ -1427,8 +1431,28 @@ def get_bulk_time_and_cycles(timing): return samples, samples * frequencies -def has_bulk_time_or_frequency_source(timing): - return timing.sample_source is not None or timing.frequency_source is not None +def has_material_bulk_source(source): + if source is None: + return False + + if isinstance(source, Float32BinarySource): + if source.count <= 0: + return False + + filename = resolve_binary_filename(source.json_dir, source.filename) + try: + return os.path.getsize(filename) > 0 + except OSError: + return False + + values = getattr(source, "values", None) + return values is not None and len(values) > 0 + + +def has_material_bulk_cycle_sources(timing): + return has_material_bulk_source(timing.sample_source) and has_material_bulk_source( + timing.frequency_source + ) def scale_interval(interval, scale): @@ -1475,10 +1499,13 @@ def confirm_clear_gap_with_clock_rate( def confirm_clear_gap_with_bulk_cycles(status, ref_timing, cmp_timing, thresholds): - has_bulk_sources = has_bulk_time_or_frequency_source( + # Only suppress the summary-clock fallback when both inputs advertise paired, + # non-empty bulk payloads. Missing or empty files are treated as unavailable; + # malformed non-empty payloads become AMBG after the lazy read below. + has_bulk_cycle_sources = has_material_bulk_cycle_sources( ref_timing - ) or has_bulk_time_or_frequency_source(cmp_timing) - if not has_bulk_sources: + ) and has_material_bulk_cycle_sources(cmp_timing) + if not has_bulk_cycle_sources: return None ref_bulk = get_bulk_time_and_cycles(ref_timing) diff --git a/python/test/test_nvbench_compare.py b/python/test/test_nvbench_compare.py index 49736aa..36b9eee 100644 --- a/python/test/test_nvbench_compare.py +++ b/python/test/test_nvbench_compare.py @@ -5,6 +5,7 @@ import importlib.util import math import sys import types +from dataclasses import replace from pathlib import Path import numpy as np @@ -548,6 +549,31 @@ def test_format_bulk_debug_python_loads_arrays(tmp_path, nvbench_compare): assert arrays["reference_frequencies"] is None +def test_format_bulk_debug_python_handles_nonfinite_values(nvbench_compare): + script = nvbench_compare.format_bulk_debug_python( + [ + { + "reference_time": math.nan, + "compare_time": math.inf, + "fractional_difference": -math.inf, + } + ] + ) + namespace = {} + + assert 'nan = float("nan")' in script + assert 'inf = float("inf")' in script + assert "'reference_time': nan" in script + assert "'compare_time': inf" in script + assert "'fractional_difference': -inf" in script + exec(script, namespace) + + row = namespace["bulk_rows"][0] + assert math.isnan(row["reference_time"]) + assert row["compare_time"] == math.inf + assert row["fractional_difference"] == -math.inf + + def test_gpu_timing_data_parses_quartiles_and_sm_clock_rate_mean(nvbench_compare): timing = nvbench_compare.extract_gpu_timing_data( [ @@ -621,7 +647,7 @@ def test_gpu_timing_data_warns_when_lazy_sample_read_fails(tmp_path, nvbench_com assert timing.samples is None -def test_compare_gpu_timings_classifies_common_cases(nvbench_compare): +def test_compare_gpu_timings_classifies_common_cases(tmp_path, nvbench_compare): ref_timing = make_gpu_timing_data(nvbench_compare, mean=1.0, stdev_relative=0.05) undecided = nvbench_compare.compare_gpu_timings( @@ -868,6 +894,33 @@ def test_compare_gpu_timings_classifies_common_cases(nvbench_compare): assert bulk_cycle_shift.status == nvbench_compare.ComparisonStatus.UNDECIDED assert bulk_cycle_shift.reason.code == "bulk_cycle_gap_not_confirmed" + missing_source = nvbench_compare.Float32BinarySource( + count=4, + filename="missing.bin", + json_dir=str(tmp_path), + description="test sample", + ) + missing_bulk_files = nvbench_compare.compare_gpu_timings( + replace( + ref_interval_timing, + sample_source=missing_source, + frequency_source=missing_source, + ), + make_gpu_timing_data( + nvbench_compare, + minimum=0.8, + first_quartile=0.85, + median=0.9, + third_quartile=0.95, + mean=0.9, + stdev_relative=0.05, + sm_clock_rate_mean=100.0, + ), + ) + assert missing_bulk_files is not None + assert missing_bulk_files.status == nvbench_compare.ComparisonStatus.FAST + assert missing_bulk_files.reason.code == "clear_gap_confirmed_by_summary_cycles" + unusable_bulk_cycles = nvbench_compare.compare_gpu_timings( make_gpu_timing_data( nvbench_compare,