diff --git a/nvbench/json_printer.cu b/nvbench/json_printer.cu index 9699281..43e70c7 100644 --- a/nvbench/json_printer.cu +++ b/nvbench/json_printer.cu @@ -67,7 +67,8 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values) const auto value_names = values.get_names(); for (const auto &value_name : value_names) { - auto &value = node[value_name]; + auto &value = node.emplace_back(); + value["name"] = value_name; const auto type = values.get_type(value_name); switch (type) @@ -89,6 +90,9 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values) value["type"] = "string"; value["value"] = values.get_string(value_name); break; + + default: + NVBENCH_THROW(std::runtime_error, "Unrecognized value type."); } // end switch (value type) } // end foreach value name } @@ -209,7 +213,7 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) auto &devices = root["devices"]; for (const auto &dev_info : nvbench::device_manager::get().get_devices()) { - auto &device = devices[devices.size()]; + auto &device = devices.emplace_back(); device["id"] = dev_info.get_id(); device["name"] = dev_info.get_name(); device["sm_version"] = dev_info.get_sm_version(); @@ -241,10 +245,10 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) for (const auto &bench_ptr : benches) { const auto bench_index = benchmarks.size(); - auto &bench = benchmarks[bench_index]; + auto &bench = benchmarks.emplace_back(); - bench["index"] = bench_index; bench["name"] = bench_ptr->get_name(); + bench["index"] = bench_index; bench["min_samples"] = bench_ptr->get_min_samples(); bench["min_time"] = bench_ptr->get_min_time(); @@ -261,8 +265,9 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) auto &axes = bench["axes"]; for (const auto &axis_ptr : bench_ptr->get_axes().get_axes()) { - auto &axis = axes[axis_ptr->get_name()]; + auto &axis = axes.emplace_back(); + axis["name"] = axis_ptr->get_name(); axis["type"] = axis_ptr->get_type_as_string(); axis["flags"] = axis_ptr->get_flags_as_string(); @@ -270,8 +275,7 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) const auto axis_size = axis_ptr->get_size(); for (std::size_t i = 0; i < axis_size; ++i) { - const auto value_idx = values.size(); - auto &value = values[value_idx]; + auto &value = values.emplace_back(); value["input_string"] = axis_ptr->get_input_string(i); value["description"] = axis_ptr->get_description(i); @@ -305,13 +309,9 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) auto &states = bench["states"]; for (const auto &exec_state : bench_ptr->get_states()) { - auto &st = states[exec_state.get_axis_values_as_string()]; + auto &st = states.emplace_back(); - // TODO: Determine if these need to be part of the state key as well - // for uniqueness. The device already is, but the type config index is - // not. - st["device"] = exec_state.get_device()->get_id(); - st["type_config_index"] = exec_state.get_type_config_index(); + st["name"] = exec_state.get_axis_values_as_string(); st["min_samples"] = exec_state.get_min_samples(); st["min_time"] = exec_state.get_min_time(); @@ -319,13 +319,50 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) st["skip_time"] = exec_state.get_skip_time(); st["timeout"] = exec_state.get_timeout(); + st["device"] = exec_state.get_device()->get_id(); + st["type_config_index"] = exec_state.get_type_config_index(); + + // TODO I'd like to replace this with: + // [ {"name" : , "index": }, ...] + // but it would take some refactoring in the data structures to get + // that information through. ::write_named_values(st["axis_values"], exec_state.get_axis_values()); auto &summaries = st["summaries"]; for (const auto &exec_summ : exec_state.get_summaries()) { - auto &summ = summaries[exec_summ.get_tag()]; - ::write_named_values(summ, exec_summ); + auto &summ = summaries.emplace_back(); + summ["tag"] = exec_summ.get_tag(); + + // Write out the expected values as simple key/value pairs + nvbench::named_values summary_values = exec_summ; + if (summary_values.has_value("name")) + { + summ["name"] = summary_values.get_string("name"); + summary_values.remove_value("name"); + } + if (summary_values.has_value("description")) + { + summ["description"] = summary_values.get_string("description"); + summary_values.remove_value("description"); + } + if (summary_values.has_value("hint")) + { + summ["hint"] = summary_values.get_string("hint"); + summary_values.remove_value("hint"); + } + if (summary_values.has_value("hide")) + { + summ["hide"] = summary_values.get_string("hide"); + summary_values.remove_value("hide"); + } + + // Write any additional values generically in + // ["data"] = [{name,type,value}, ...]: + if (summary_values.get_size() != 0) + { + ::write_named_values(summ["data"], summary_values); + } } st["is_skipped"] = exec_state.is_skipped(); diff --git a/scripts/nvbench_compare.py b/scripts/nvbench_compare.py index a9359fa..ac2509e 100755 --- a/scripts/nvbench_compare.py +++ b/scripts/nvbench_compare.py @@ -10,10 +10,12 @@ from colorama import Fore import tabulate + # Parse version string into tuple, "x.y.z" -> (x, y, z) def version_tuple(v): return tuple(map(int, (v.split(".")))) + tabulate_version = version_tuple(tabulate.__version__) all_devices = [] @@ -38,8 +40,8 @@ def find_device_by_id(device_id): def format_int64_axis_value(axis_name, axis_value, axes): - axis_def = axes[axis_name] - axis_flags = axis_def["flags"] + axis = next(filter(lambda ax: ax["name"] == axis_name, axes)) + axis_flags = axis["flags"] value = int(axis_value["value"]) if axis_flags == "pow2": value = math.log2(value) @@ -60,8 +62,8 @@ def format_string_axis_value(axis_name, axis_value, axes): def format_axis_value(axis_name, axis_value, axes): - axis_def = axes[axis_name] - axis_type = axis_def["type"] + axis = next(filter(lambda ax: ax["name"] == axis_name, axes)) + axis_type = axis["type"] if axis_type == "int64": return format_int64_axis_value(axis_name, axis_value, axes) elif axis_type == "float64": @@ -110,7 +112,7 @@ def compare_benches(ref_benches, cmp_benches, threshold): ref_states = ref_bench["states"] cmp_states = cmp_bench["states"] - headers = list(axes.keys()) if axes else [] + headers = [x["name"] for x in axes] colalign = ["center"] * len(headers) headers.append("Ref Time") @@ -131,9 +133,11 @@ def compare_benches(ref_benches, cmp_benches, threshold): for device_id in device_ids: rows = [] - for cmp_state_name in cmp_states: - cmp_state = cmp_states[cmp_state_name] - ref_state = ref_states[cmp_state_name] + for cmp_state in cmp_states: + cmp_state_name = cmp_state["name"] + ref_state = next(filter(lambda st: st["name"] == cmp_state_name, + ref_states), + None) if not ref_state: continue @@ -142,8 +146,8 @@ def compare_benches(ref_benches, cmp_benches, threshold): axis_values = [] row = [] - for axis_value_name in axis_values: - axis_value = axis_values[axis_value_name] + for axis_value in axis_values: + axis_value_name = axis_value["name"] row.append(format_axis_value(axis_value_name, axis_value, axes)) @@ -154,14 +158,13 @@ def compare_benches(ref_benches, cmp_benches, threshold): if not ref_summaries or not cmp_summaries: continue - cmp_time_summary = cmp_summaries.get("nv/cold/time/gpu/mean") - ref_time_summary = ref_summaries.get("nv/cold/time/gpu/mean") - cmp_noise_summary = cmp_summaries.get( - "nv/cold/time/gpu/stdev/relative" - ) - ref_noise_summary = ref_summaries.get( - "nv/cold/time/gpu/stdev/relative" - ) + def lookup_summary(summaries, tag): + return next(filter(lambda s: s["tag"] == tag, summaries), None) + + cmp_time_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/mean") + ref_time_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/mean") + cmp_noise_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/stdev/relative") + ref_noise_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/stdev/relative") # TODO: Use other timings, too. Maybe multiple rows, with a # "Timing" column + values "CPU/GPU/Batch"? @@ -171,10 +174,16 @@ def compare_benches(ref_benches, cmp_benches, threshold): ref_noise_summary]): continue - cmp_time = cmp_time_summary["value"]["value"] - ref_time = ref_time_summary["value"]["value"] - cmp_noise = cmp_noise_summary["value"]["value"] - ref_noise = ref_noise_summary["value"]["value"] + def extract_value(summary): + summary_data = summary["data"] + value_data = next(filter(lambda v: v["name"] == "value", summary_data)) + assert(value_data["type"] == "float64") + return value_data["value"] + + cmp_time = extract_value(cmp_time_summary) + ref_time = extract_value(ref_time_summary) + cmp_noise = extract_value(cmp_noise_summary) + ref_noise = extract_value(ref_noise_summary) # Convert string encoding to expected numerics: cmp_time = float(cmp_time) @@ -223,7 +232,6 @@ def compare_benches(ref_benches, cmp_benches, threshold): rows.append(row) - if len(rows) == 0: continue @@ -244,13 +252,12 @@ def compare_benches(ref_benches, cmp_benches, threshold): def main(): - help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]" parser = argparse.ArgumentParser(prog='nvbench_compare', usage=help_text) - parser.add_argument('--threshold-diff',type=float, dest='threshold', default=0.0, + parser.add_argument('--threshold-diff', type=float, dest='threshold', default=0.0, help='only show benchmarks where percentage diff is >= THRESHOLD') - args,files_or_dirs = parser.parse_known_args() + args, files_or_dirs = parser.parse_known_args() print(files_or_dirs) if len(files_or_dirs) != 2: @@ -270,9 +277,9 @@ def main(): os.path.getsize(r) > 0 and os.path.getsize(c) > 0: to_compare.append((r, c)) else: - to_compare = [(files_or_dirs[0],files_or_dirs[1])] + to_compare = [(files_or_dirs[0], files_or_dirs[1])] - for ref,comp in to_compare: + for ref, comp in to_compare: with open(ref, "r") as ref_file: ref_root = json.load(ref_file) diff --git a/scripts/nvbench_walltime.py b/scripts/nvbench_walltime.py index 611ecfc..fb044cb 100644 --- a/scripts/nvbench_walltime.py +++ b/scripts/nvbench_walltime.py @@ -62,16 +62,21 @@ def init_measures(): def get_measures(state): + summaries = state["summaries"] times = {} for name in measure_names: - try: - time = state["summaries"]["nv/%s/walltime" % name]["value"]["value"] - time = float(time) - except KeyError: - time = None - except TypeError: - time = None - times[name] = time if time else 0. + measure_walltime_tag = "nv/{}/walltime".format(name) + summary = next(filter(lambda s: s["tag"] == measure_walltime_tag, + summaries), + None) + if not summary: + continue + + walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"])) + assert(walltime_data["type"] == "float64") + walltime = walltime_data["value"] + walltime = float(walltime) + times[name] = walltime if walltime else 0. return times @@ -130,8 +135,9 @@ def consume_benchmark(bench, file_root): axes_out = {} axes = bench["axes"] if axes: - for axis_name, axis in axes.items(): + for axis in axes: values_out = {} + axis_name = axis["name"] axis_type = axis["type"] for value in axis["values"]: if axis_type == "type": @@ -144,7 +150,8 @@ def consume_benchmark(bench, file_root): states_out = {} bench_measures = init_measures() - for state_name, state in bench["states"].items(): + for state in bench["states"]: + state_name = state["name"] # Get walltimes for each measurement: state_measures = get_measures(state) state_out = {} @@ -157,8 +164,9 @@ def consume_benchmark(bench, file_root): # Update the axis measurements: axis_values = state["axis_values"] if axis_values: - for axis_name, value_data in axis_values.items(): - value = format_axis_value(value_data["value"], value_data["type"]) + for axis_value in axis_values: + axis_name = axis_value["name"] + value = format_axis_value(axis_value["value"], axis_value["type"]) merge_measures(axes_out[axis_name][value]["measures"], state_measures) bench_out["axes"] = axes_out