From ea592b6444d0b2461c682d0ba0c67fbc51dfbbe1 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 4 May 2026 15:58:42 -0500
Subject: [PATCH] Tweaks for nvbench_compare

1. For JSON files that contains repeated measurements of run-time
   axis values, make sure that scripts compares corresponding
   reference entries.

   If cmp had two states with the same name and ref had two, we
   would compare measurements for each state in cmp against the
   first state in ref.

   Change here introduces counters tracking how many times each
   particular axis value, and retrieve corresponding entry in ref.

Previously, I had

```

|  BlockSize  |  NumBlocks  |   Ref Time |   Ref Noise |   Cmp Time |   Cmp Noise |      Diff |   %Diff |  Status  |
|-------------|-------------|------------|-------------|------------|-------------|-----------|---------|----------|
|     2^8     |     64      |   1.776 ms |       0.46% |   1.777 ms |       0.40% |  1.024 us |   0.06% |   SAME   |
|     2^8     |     64      |   1.776 ms |       0.46% |   1.774 ms |       0.52% | -2.048 us |  -0.12% |   SAME   |
|     2^8     |     64      |   1.776 ms |       0.46% |   1.773 ms |       0.52% | -3.072 us |  -0.17% |   SAME   |
|     2^8     |     64      |   1.776 ms |       0.46% |   1.774 ms |       0.58% | -2.048 us |  -0.12% |   SAME   |
|     2^8     |     64      |   1.776 ms |       0.46% |   1.773 ms |       0.58% | -3.072 us |  -0.17% |   SAME   |
```

and now it becomes

```

|  BlockSize  |  NumBlocks  |   Ref Time |   Ref Noise |   Cmp Time |   Cmp Noise |      Diff |   %Diff |  Status  |
|-------------|-------------|------------|-------------|------------|-------------|-----------|---------|----------|
|     2^8     |     64      |   1.776 ms |       0.46% |   1.777 ms |       0.40% |  1.024 us |   0.06% |   SAME   |
|     2^8     |     64      |   1.773 ms |       0.64% |   1.774 ms |       0.52% |  1.024 us |   0.06% |   SAME   |
|     2^8     |     64      |   1.774 ms |       0.46% |   1.773 ms |       0.52% | -1.024 us |  -0.06% |   SAME   |
|     2^8     |     64      |   1.773 ms |       0.46% |   1.774 ms |       0.58% |  1.024 us |   0.06% |   SAME   |
|     2^8     |     64      |   1.774 ms |       0.52% |   1.773 ms |       0.58% | -1.024 us |  -0.06% |   SAME   |
```

With the following raw data expected

```
(py313) opavlyk@NV-22T4X34:~/repos/nvbench$ jq '. | .benchmarks[] | .states[] | .summaries[] | select(.tag == "nv/cold/time/gpu/median") | .data[] | .value' base.json
"0.0017756160497665405"
"0.0017725440263748169"
"0.001773568034172058"
"0.0017725440263748169"
"0.001773568034172058"

(py313) opavlyk@NV-22T4X34:~/repos/nvbench$ jq '. | .benchmarks[] | .states[] | .summaries[] | select(.tag == "nv/cold/time/gpu/median") | .data[] | .value' test.json
"0.0017766400575637818"
"0.001773568034172058"
"0.0017725440263748169"
"0.001773568034172058"
"0.0017725440263748169"
```

2. nvbench_compare changes from using min_noise = min(ref_noise, cmp_noise) to using max_noise = max(ref_noise, cmp_noise)
   Using larger of ref and cmp noise level as a reference against which to gauge timing difference ratio makes more sense.
---
 python/scripts/nvbench_compare.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/python/scripts/nvbench_compare.py b/python/scripts/nvbench_compare.py
index c637033..209e0d1 100644
--- a/python/scripts/nvbench_compare.py
+++ b/python/scripts/nvbench_compare.py
@@ -5,6 +5,7 @@ import math
 import os
 import sys
 from enum import StrEnum
+from itertools import islice
 
 import jsondiff
 import tabulate
@@ -347,11 +348,18 @@ def compare_benches(
         for cmp_device_id in cmp_device_ids:
             rows = []
             plot_data = {"cmp": {}, "ref": {}, "cmp_noise": {}, "ref_noise": {}}
+            counters = {}
 
             for cmp_state in cmp_states:
                 cmp_state_name = cmp_state["name"]
+                counters[cmp_state_name] = counters.get(cmp_state_name, 0) + 1
                 ref_state = next(
-                    filter(lambda st: st["name"] == cmp_state_name, ref_states), None
+                    islice(
+                        filter(lambda st: st["name"] == cmp_state_name, ref_states),
+                        counters[cmp_state_name] - 1,
+                        None,
+                    ),
+                    None,
                 )
                 if not ref_state:
                     continue
@@ -424,15 +432,15 @@ def compare_benches(
                 if ref_noise and cmp_noise:
                     ref_noise = float(ref_noise)
                     cmp_noise = float(cmp_noise)
-                    min_noise = min(ref_noise, cmp_noise)
+                    max_noise = max(ref_noise, cmp_noise)
                 elif ref_noise:
                     ref_noise = float(ref_noise)
-                    min_noise = ref_noise
+                    max_noise = ref_noise
                 elif cmp_noise:
                     cmp_noise = float(cmp_noise)
-                    min_noise = cmp_noise
+                    max_noise = cmp_noise
                 else:
-                    min_noise = None  # Noise is inf
+                    max_noise = None  # Noise is inf
 
                 if plot_along:
                     axis_name = []
@@ -461,11 +469,11 @@ def compare_benches(
                 global failure_count
 
                 config_count += 1
-                if not min_noise:
+                if max_noise is None:
                     unknown_count += 1
                     status_label = "????"
                     status = colorize(status_label, Fore.YELLOW, Emoji.YELLOW, no_color)
-                elif abs(frac_diff) <= min_noise:
+                elif abs(frac_diff) <= max_noise:
                     pass_count += 1
                     status_label = "SAME"
                     status = colorize(status_label, Fore.BLUE, Emoji.BLUE, no_color)
@@ -695,9 +703,9 @@ def main():
 
     print("# Summary\n")
     print("- Total Matches: %d" % config_count)
-    print("  - Pass    (diff <= min_noise): %d" % pass_count)
+    print("  - Pass    (diff <= max_noise): %d" % pass_count)
     print("  - Unknown (infinite noise):    %d" % unknown_count)
-    print("  - Failure (diff > min_noise):  %d" % failure_count)
+    print("  - Failure (diff > max_noise):  %d" % failure_count)
     return failure_count