Merge pull request #14 from vyasr/enhanced_compare

Improve compare output
2026-04-20 06:48:53 +00:00 · 2021-06-22 16:27:03 -04:00
parent ff507596bf 861f66c161
commit 5fab2536e5
5 changed files with 46351 additions and 21 deletions
--- a/nvbench/json_printer.cu
+++ b/nvbench/json_printer.cu
@@ -24,6 +24,8 @@
 #include <nvbench/device_manager.cuh>
 #include <nvbench/summary.cuh>

+#include <fmt/format.h>
+
 #include <nlohmann/json.hpp>

 #include <cstdint>
@@ -40,22 +42,22 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
  const auto value_names = values.get_names();
  for (const auto &value_name : value_names)
  {
-    const auto value_index = node.size();
-    auto &value            = node[value_index];
-
-    value["name"] = value_name;
+    auto &value            = node[value_name];

    const auto type = values.get_type(value_name);
    switch (type)
    {
      case nvbench::named_values::type::int64:
        value["type"]  = "int64";
-        value["value"] = values.get_int64(value_name);
+        // Write as a string; JSON encodes all numbers as double-precision
+        // floats, which would truncate int64s.
+        value["value"] = fmt::to_string(values.get_int64(value_name));
        break;

      case nvbench::named_values::type::float64:
        value["type"]  = "float64";
-        value["value"] = values.get_float64(value_name);
+        // Write as a string for consistency with int64.
+        value["value"] = fmt::to_string(values.get_float64(value_name));
        break;

      case nvbench::named_values::type::string:
@@ -131,11 +133,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
      auto &axes = bench["axes"];
      for (const auto &axis_ptr : bench_ptr->get_axes().get_axes())
      {
-        const auto axis_index = axes.size();
-        auto &axis            = axes[axis_index];
+        auto &axis = axes[axis_ptr->get_name()];

-        axis["index"] = axis_index;
-        axis["name"]  = axis_ptr->get_name();
        axis["type"]  = axis_ptr->get_type_as_string();
        axis["flags"] = axis_ptr->get_flags_as_string();

@@ -178,11 +177,11 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
      auto &states = bench["states"];
      for (const auto &exec_state : bench_ptr->get_states())
      {
-        const auto state_index = states.size();
-        auto &st               = states[state_index];
+        auto &st               = states[exec_state.get_axis_values_as_string()];

-        st["index"]             = state_index;
-        st["description"]       = exec_state.get_axis_values_as_string();
+        // TODO: Determine if these need to be part of the state key as well
+        // for uniqueness. The device already is, but the type config index is
+        // not.
        st["device"]            = exec_state.get_device()->get_id();
        st["type_config_index"] = exec_state.get_type_config_index();

@@ -197,13 +196,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
        auto &summaries = st["summaries"];
        for (const auto &exec_summ : exec_state.get_summaries())
        {
-          const auto summ_index = summaries.size();
-          auto &summ            = summaries[summ_index];
-
-          summ["index"] = summ_index;
-          summ["name"]  = exec_summ.get_name();
-
-          ::write_named_values(summ["values"], exec_summ);
+          auto &summ            = summaries[exec_summ.get_name()];
+          ::write_named_values(summ, exec_summ);
        }

        st["is_skipped"] = exec_state.is_skipped();
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+tabulate
+colorama
--- a/scripts/nvbench_compare.py
+++ b/scripts/nvbench_compare.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+
+from colorama import Fore
+import json
+import math
+import sys
+
+import tabulate
+
+if len(sys.argv) != 3:
+    print("Usage: %s reference.json compare.json\n" % sys.argv[0])
+    sys.exit(1)
+
+with open(sys.argv[1], "r") as ref_file:
+    ref_root = json.load(ref_file)
+
+with open(sys.argv[2], "r") as cmp_file:
+    cmp_root = json.load(cmp_file)
+
+# This is blunt but works for now:
+if ref_root["devices"] != cmp_root["devices"]:
+    print("Device sections do not match.")
+    sys.exit(1)
+
+all_devices = cmp_root["devices"]
+config_count = 0
+unknown_count = 0
+failure_count = 0
+pass_count = 0
+
+
+def find_matching_bench(needle, haystack):
+    for hay in haystack:
+        if hay["name"] == needle["name"] and hay["axes"] == needle["axes"]:
+            return hay
+    return None
+
+
+def find_device_by_id(device_id):
+    for device in all_devices:
+        if device["id"] == device_id:
+            return device
+    return None
+
+
+def format_int64_axis_value(axis_name, axis_value, axes):
+    axis_def = axes[axis_name]
+    axis_flags = axis_def["flags"]
+    value = axis_value["value"]
+    if axis_flags == "pow2":
+        value = math.log2(value)
+        return "2^%d" % value
+    return "%d" % value
+
+
+def format_float64_axis_value(axis_name, axis_value, axes):
+    return "%.5g" % axis_value["value"]
+
+
+def format_type_axis_value(axis_name, axis_value, axes):
+    return "%s" % axis_value["value"]
+
+
+def format_string_axis_value(axis_name, axis_value, axes):
+    return "%s" % axis_value["value"]
+
+
+def format_axis_value(axis_name, axis_value, axes):
+    axis_def = axes[axis_name]
+    axis_type = axis_def["type"]
+    if axis_type == "int64":
+        return format_int64_axis_value(axis_name, axis_value, axes)
+    elif axis_type == "float64":
+        return format_float64_axis_value(axis_name, axis_value, axes)
+    elif axis_type == "type":
+        return format_type_axis_value(axis_name, axis_value, axes)
+    elif axis_type == "string":
+        return format_string_axis_value(axis_name, axis_value, axes)
+
+
+def format_duration(seconds):
+    if seconds >= 1:
+        multiplier = 1.0
+        units = "s"
+    elif seconds >= 1e-3:
+        multiplier = 1e3
+        units = "ms"
+    elif seconds >= 1e-6:
+        multiplier = 1e6
+        units = "us"
+    else:
+        multiplier = 1e6
+        units = "us"
+    return "%0.3f %s" % (seconds * multiplier, units)
+
+
+def format_percentage(percentage):
+    # When there aren't enough samples for a meaningful noise measurement,
+    # the noise is recorded as infinity. Unfortunately, JSON spec doesn't
+    # allow for inf, so these get turned into null.
+    if not percentage:
+        return "inf"
+    return "%0.2f%%" % (percentage * 100.0)
+
+
+def compare_benches(ref_benches, cmp_benches):
+    for cmp_bench in cmp_benches:
+        ref_bench = find_matching_bench(cmp_bench, ref_benches)
+        if not ref_bench:
+            continue
+
+        print("# %s\n" % (cmp_bench["name"]))
+
+        device_ids = cmp_bench["devices"]
+        axes = cmp_bench["axes"]
+        ref_states = ref_bench["states"]
+        cmp_states = cmp_bench["states"]
+
+        headers = list(axes.keys())
+        colalign = ["center"] * len(headers)
+
+        headers.append("Ref Time")
+        colalign.append("right")
+        headers.append("Ref Noise")
+        colalign.append("right")
+        headers.append("Cmp Time")
+        colalign.append("right")
+        headers.append("Cmp Noise")
+        colalign.append("right")
+        headers.append("Diff")
+        colalign.append("right")
+        headers.append("%Diff")
+        colalign.append("right")
+        headers.append("Status")
+        colalign.append("center")
+
+        for device_id in device_ids:
+            device = find_device_by_id(device_id)
+            print("## [%d] %s\n" % (device["id"], device["name"]))
+
+            rows = []
+            for cmp_state_name in cmp_states:
+                cmp_state = cmp_states[cmp_state_name]
+                ref_state = ref_states[cmp_state_name]
+                if not ref_state:
+                    continue
+
+                axis_values = cmp_state["axis_values"]
+                row = []
+                for axis_value_name in axis_values:
+                    axis_value = axis_values[axis_value_name]
+                    row.append(format_axis_value(axis_value_name,
+                                                 axis_value,
+                                                 axes))
+
+                cmp_summaries = cmp_state["summaries"]
+                ref_summaries = ref_state["summaries"]
+
+                if not ref_summaries or not cmp_summaries:
+                    continue
+
+                cmp_time_summary = cmp_summaries.get("Average GPU Time (Cold)")
+                ref_time_summary = ref_summaries.get("Average GPU Time (Cold)")
+                cmp_noise_summary = cmp_summaries.get(
+                    "GPU Relative Standard Deviation (Cold)"
+                )
+                ref_noise_summary = ref_summaries.get(
+                    "GPU Relative Standard Deviation (Cold)"
+                )
+
+                # TODO: Use other timings, too. Maybe multiple rows, with a
+                # "Timing" column + values "CPU/GPU/Batch"?
+                if not all([cmp_time_summary,
+                            ref_time_summary,
+                            cmp_noise_summary,
+                            ref_noise_summary]):
+                    continue
+
+                cmp_time = cmp_time_summary["value"]["value"]
+                ref_time = ref_time_summary["value"]["value"]
+                cmp_noise = cmp_noise_summary["value"]["value"]
+                ref_noise = ref_noise_summary["value"]["value"]
+                diff = cmp_time - ref_time
+                frac_diff = diff / ref_time
+
+                # Convert string encoding to expected numerics:
+                cmp_time = float(cmp_time)
+                ref_time = float(ref_time)
+
+                if ref_noise and cmp_noise:
+                    ref_noise = float(ref_noise)
+                    cmp_noise = float(cmp_noise)
+                    min_noise = min(ref_noise, cmp_noise)
+                elif ref_noise:
+                    ref_noise = float(ref_noise)
+                    min_noise = ref_noise
+                elif cmp_noise:
+                    cmp_noise = float(cmp_noise)
+                    min_noise = cmp_noise
+                else:
+                    min_noise = None  # Noise is inf
+
+                global config_count
+                global unknown_count
+                global pass_count
+                global failure_count
+
+                config_count += 1
+                if not min_noise:
+                    unknown_count += 1
+                    status = Fore.YELLOW + "????" + Fore.RESET
+                elif abs(frac_diff) <= min_noise:
+                    pass_count += 1
+                    status = Fore.GREEN + "PASS" + Fore.RESET
+                else:
+                    failure_count += 1
+                    status = Fore.RED + "FAIL" + Fore.RESET
+
+                row.append(format_duration(ref_time))
+                row.append(format_percentage(ref_noise))
+                row.append(format_duration(cmp_time))
+                row.append(format_percentage(cmp_noise))
+                row.append(format_duration(diff))
+                row.append(format_percentage(frac_diff))
+                row.append(status)
+
+                rows.append(row)
+
+            print(tabulate.tabulate(rows,
+                                    headers=headers,
+                                    colalign=colalign,
+                                    tablefmt="github"))
+            print("")
+
+
+compare_benches(ref_root["benchmarks"], cmp_root["benchmarks"])
+
+print("# Summary\n")
+print("- Total Matches: %d" % config_count)
+print("  - Pass    (diff <= min_noise): %d" % pass_count)
+print("  - Unknown (infinite noise):    %d" % unknown_count)
+print("  - Failure (diff > min_noise):  %d" % failure_count)
+
+sys.exit(failure_count)
--- a/scripts/test_cmp.json
+++ b/scripts/test_cmp.json
--- a/scripts/test_ref.json
+++ b/scripts/test_ref.json