mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 06:48:53 +00:00
Merge pull request #14 from vyasr/enhanced_compare
Improve compare output
This commit is contained in:
@@ -24,6 +24,8 @@
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
@@ -40,22 +42,22 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
|
||||
const auto value_names = values.get_names();
|
||||
for (const auto &value_name : value_names)
|
||||
{
|
||||
const auto value_index = node.size();
|
||||
auto &value = node[value_index];
|
||||
|
||||
value["name"] = value_name;
|
||||
auto &value = node[value_name];
|
||||
|
||||
const auto type = values.get_type(value_name);
|
||||
switch (type)
|
||||
{
|
||||
case nvbench::named_values::type::int64:
|
||||
value["type"] = "int64";
|
||||
value["value"] = values.get_int64(value_name);
|
||||
// Write as a string; JSON encodes all numbers as double-precision
|
||||
// floats, which would truncate int64s.
|
||||
value["value"] = fmt::to_string(values.get_int64(value_name));
|
||||
break;
|
||||
|
||||
case nvbench::named_values::type::float64:
|
||||
value["type"] = "float64";
|
||||
value["value"] = values.get_float64(value_name);
|
||||
// Write as a string for consistency with int64.
|
||||
value["value"] = fmt::to_string(values.get_float64(value_name));
|
||||
break;
|
||||
|
||||
case nvbench::named_values::type::string:
|
||||
@@ -131,11 +133,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
auto &axes = bench["axes"];
|
||||
for (const auto &axis_ptr : bench_ptr->get_axes().get_axes())
|
||||
{
|
||||
const auto axis_index = axes.size();
|
||||
auto &axis = axes[axis_index];
|
||||
auto &axis = axes[axis_ptr->get_name()];
|
||||
|
||||
axis["index"] = axis_index;
|
||||
axis["name"] = axis_ptr->get_name();
|
||||
axis["type"] = axis_ptr->get_type_as_string();
|
||||
axis["flags"] = axis_ptr->get_flags_as_string();
|
||||
|
||||
@@ -178,11 +177,11 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
auto &states = bench["states"];
|
||||
for (const auto &exec_state : bench_ptr->get_states())
|
||||
{
|
||||
const auto state_index = states.size();
|
||||
auto &st = states[state_index];
|
||||
auto &st = states[exec_state.get_axis_values_as_string()];
|
||||
|
||||
st["index"] = state_index;
|
||||
st["description"] = exec_state.get_axis_values_as_string();
|
||||
// TODO: Determine if these need to be part of the state key as well
|
||||
// for uniqueness. The device already is, but the type config index is
|
||||
// not.
|
||||
st["device"] = exec_state.get_device()->get_id();
|
||||
st["type_config_index"] = exec_state.get_type_config_index();
|
||||
|
||||
@@ -197,13 +196,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
auto &summaries = st["summaries"];
|
||||
for (const auto &exec_summ : exec_state.get_summaries())
|
||||
{
|
||||
const auto summ_index = summaries.size();
|
||||
auto &summ = summaries[summ_index];
|
||||
|
||||
summ["index"] = summ_index;
|
||||
summ["name"] = exec_summ.get_name();
|
||||
|
||||
::write_named_values(summ["values"], exec_summ);
|
||||
auto &summ = summaries[exec_summ.get_name()];
|
||||
::write_named_values(summ, exec_summ);
|
||||
}
|
||||
|
||||
st["is_skipped"] = exec_state.is_skipped();
|
||||
|
||||
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
tabulate
|
||||
colorama
|
||||
244
scripts/nvbench_compare.py
Normal file
244
scripts/nvbench_compare.py
Normal file
@@ -0,0 +1,244 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from colorama import Fore
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
|
||||
import tabulate
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: %s reference.json compare.json\n" % sys.argv[0])
|
||||
sys.exit(1)
|
||||
|
||||
with open(sys.argv[1], "r") as ref_file:
|
||||
ref_root = json.load(ref_file)
|
||||
|
||||
with open(sys.argv[2], "r") as cmp_file:
|
||||
cmp_root = json.load(cmp_file)
|
||||
|
||||
# This is blunt but works for now:
|
||||
if ref_root["devices"] != cmp_root["devices"]:
|
||||
print("Device sections do not match.")
|
||||
sys.exit(1)
|
||||
|
||||
all_devices = cmp_root["devices"]
|
||||
config_count = 0
|
||||
unknown_count = 0
|
||||
failure_count = 0
|
||||
pass_count = 0
|
||||
|
||||
|
||||
def find_matching_bench(needle, haystack):
|
||||
for hay in haystack:
|
||||
if hay["name"] == needle["name"] and hay["axes"] == needle["axes"]:
|
||||
return hay
|
||||
return None
|
||||
|
||||
|
||||
def find_device_by_id(device_id):
|
||||
for device in all_devices:
|
||||
if device["id"] == device_id:
|
||||
return device
|
||||
return None
|
||||
|
||||
|
||||
def format_int64_axis_value(axis_name, axis_value, axes):
|
||||
axis_def = axes[axis_name]
|
||||
axis_flags = axis_def["flags"]
|
||||
value = axis_value["value"]
|
||||
if axis_flags == "pow2":
|
||||
value = math.log2(value)
|
||||
return "2^%d" % value
|
||||
return "%d" % value
|
||||
|
||||
|
||||
def format_float64_axis_value(axis_name, axis_value, axes):
|
||||
return "%.5g" % axis_value["value"]
|
||||
|
||||
|
||||
def format_type_axis_value(axis_name, axis_value, axes):
|
||||
return "%s" % axis_value["value"]
|
||||
|
||||
|
||||
def format_string_axis_value(axis_name, axis_value, axes):
|
||||
return "%s" % axis_value["value"]
|
||||
|
||||
|
||||
def format_axis_value(axis_name, axis_value, axes):
|
||||
axis_def = axes[axis_name]
|
||||
axis_type = axis_def["type"]
|
||||
if axis_type == "int64":
|
||||
return format_int64_axis_value(axis_name, axis_value, axes)
|
||||
elif axis_type == "float64":
|
||||
return format_float64_axis_value(axis_name, axis_value, axes)
|
||||
elif axis_type == "type":
|
||||
return format_type_axis_value(axis_name, axis_value, axes)
|
||||
elif axis_type == "string":
|
||||
return format_string_axis_value(axis_name, axis_value, axes)
|
||||
|
||||
|
||||
def format_duration(seconds):
|
||||
if seconds >= 1:
|
||||
multiplier = 1.0
|
||||
units = "s"
|
||||
elif seconds >= 1e-3:
|
||||
multiplier = 1e3
|
||||
units = "ms"
|
||||
elif seconds >= 1e-6:
|
||||
multiplier = 1e6
|
||||
units = "us"
|
||||
else:
|
||||
multiplier = 1e6
|
||||
units = "us"
|
||||
return "%0.3f %s" % (seconds * multiplier, units)
|
||||
|
||||
|
||||
def format_percentage(percentage):
|
||||
# When there aren't enough samples for a meaningful noise measurement,
|
||||
# the noise is recorded as infinity. Unfortunately, JSON spec doesn't
|
||||
# allow for inf, so these get turned into null.
|
||||
if not percentage:
|
||||
return "inf"
|
||||
return "%0.2f%%" % (percentage * 100.0)
|
||||
|
||||
|
||||
def compare_benches(ref_benches, cmp_benches):
|
||||
for cmp_bench in cmp_benches:
|
||||
ref_bench = find_matching_bench(cmp_bench, ref_benches)
|
||||
if not ref_bench:
|
||||
continue
|
||||
|
||||
print("# %s\n" % (cmp_bench["name"]))
|
||||
|
||||
device_ids = cmp_bench["devices"]
|
||||
axes = cmp_bench["axes"]
|
||||
ref_states = ref_bench["states"]
|
||||
cmp_states = cmp_bench["states"]
|
||||
|
||||
headers = list(axes.keys())
|
||||
colalign = ["center"] * len(headers)
|
||||
|
||||
headers.append("Ref Time")
|
||||
colalign.append("right")
|
||||
headers.append("Ref Noise")
|
||||
colalign.append("right")
|
||||
headers.append("Cmp Time")
|
||||
colalign.append("right")
|
||||
headers.append("Cmp Noise")
|
||||
colalign.append("right")
|
||||
headers.append("Diff")
|
||||
colalign.append("right")
|
||||
headers.append("%Diff")
|
||||
colalign.append("right")
|
||||
headers.append("Status")
|
||||
colalign.append("center")
|
||||
|
||||
for device_id in device_ids:
|
||||
device = find_device_by_id(device_id)
|
||||
print("## [%d] %s\n" % (device["id"], device["name"]))
|
||||
|
||||
rows = []
|
||||
for cmp_state_name in cmp_states:
|
||||
cmp_state = cmp_states[cmp_state_name]
|
||||
ref_state = ref_states[cmp_state_name]
|
||||
if not ref_state:
|
||||
continue
|
||||
|
||||
axis_values = cmp_state["axis_values"]
|
||||
row = []
|
||||
for axis_value_name in axis_values:
|
||||
axis_value = axis_values[axis_value_name]
|
||||
row.append(format_axis_value(axis_value_name,
|
||||
axis_value,
|
||||
axes))
|
||||
|
||||
cmp_summaries = cmp_state["summaries"]
|
||||
ref_summaries = ref_state["summaries"]
|
||||
|
||||
if not ref_summaries or not cmp_summaries:
|
||||
continue
|
||||
|
||||
cmp_time_summary = cmp_summaries.get("Average GPU Time (Cold)")
|
||||
ref_time_summary = ref_summaries.get("Average GPU Time (Cold)")
|
||||
cmp_noise_summary = cmp_summaries.get(
|
||||
"GPU Relative Standard Deviation (Cold)"
|
||||
)
|
||||
ref_noise_summary = ref_summaries.get(
|
||||
"GPU Relative Standard Deviation (Cold)"
|
||||
)
|
||||
|
||||
# TODO: Use other timings, too. Maybe multiple rows, with a
|
||||
# "Timing" column + values "CPU/GPU/Batch"?
|
||||
if not all([cmp_time_summary,
|
||||
ref_time_summary,
|
||||
cmp_noise_summary,
|
||||
ref_noise_summary]):
|
||||
continue
|
||||
|
||||
cmp_time = cmp_time_summary["value"]["value"]
|
||||
ref_time = ref_time_summary["value"]["value"]
|
||||
cmp_noise = cmp_noise_summary["value"]["value"]
|
||||
ref_noise = ref_noise_summary["value"]["value"]
|
||||
diff = cmp_time - ref_time
|
||||
frac_diff = diff / ref_time
|
||||
|
||||
# Convert string encoding to expected numerics:
|
||||
cmp_time = float(cmp_time)
|
||||
ref_time = float(ref_time)
|
||||
|
||||
if ref_noise and cmp_noise:
|
||||
ref_noise = float(ref_noise)
|
||||
cmp_noise = float(cmp_noise)
|
||||
min_noise = min(ref_noise, cmp_noise)
|
||||
elif ref_noise:
|
||||
ref_noise = float(ref_noise)
|
||||
min_noise = ref_noise
|
||||
elif cmp_noise:
|
||||
cmp_noise = float(cmp_noise)
|
||||
min_noise = cmp_noise
|
||||
else:
|
||||
min_noise = None # Noise is inf
|
||||
|
||||
global config_count
|
||||
global unknown_count
|
||||
global pass_count
|
||||
global failure_count
|
||||
|
||||
config_count += 1
|
||||
if not min_noise:
|
||||
unknown_count += 1
|
||||
status = Fore.YELLOW + "????" + Fore.RESET
|
||||
elif abs(frac_diff) <= min_noise:
|
||||
pass_count += 1
|
||||
status = Fore.GREEN + "PASS" + Fore.RESET
|
||||
else:
|
||||
failure_count += 1
|
||||
status = Fore.RED + "FAIL" + Fore.RESET
|
||||
|
||||
row.append(format_duration(ref_time))
|
||||
row.append(format_percentage(ref_noise))
|
||||
row.append(format_duration(cmp_time))
|
||||
row.append(format_percentage(cmp_noise))
|
||||
row.append(format_duration(diff))
|
||||
row.append(format_percentage(frac_diff))
|
||||
row.append(status)
|
||||
|
||||
rows.append(row)
|
||||
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
colalign=colalign,
|
||||
tablefmt="github"))
|
||||
print("")
|
||||
|
||||
|
||||
compare_benches(ref_root["benchmarks"], cmp_root["benchmarks"])
|
||||
|
||||
print("# Summary\n")
|
||||
print("- Total Matches: %d" % config_count)
|
||||
print(" - Pass (diff <= min_noise): %d" % pass_count)
|
||||
print(" - Unknown (infinite noise): %d" % unknown_count)
|
||||
print(" - Failure (diff > min_noise): %d" % failure_count)
|
||||
|
||||
sys.exit(failure_count)
|
||||
23045
scripts/test_cmp.json
Normal file
23045
scripts/test_cmp.json
Normal file
File diff suppressed because it is too large
Load Diff
23045
scripts/test_ref.json
Normal file
23045
scripts/test_ref.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user