mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
* json_printer.cu changed to use write-out buffer of 4KB The json_printer::do_process_bulk_data_float64 used to write out one float32 value at a time. This PR introduces a buffer of 4KB that is being filled with values until full, and then written out. The 4KB value aligns with system memory page size and seems appropriate for relatively small datasizes of duration measurements. * Add explicit static cast from std::size_t to std::streamsize The explcit cast avoids narrowing error. * Factor out writing array out to binary file into standalone function This function is templated based on buffer-size. The function can be reused to also write-out frequence samples in the future.
528 lines
17 KiB
Plaintext
528 lines
17 KiB
Plaintext
/*
|
|
* Copyright 2021 NVIDIA Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License.
|
|
*
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <nvbench/axes_metadata.cuh>
|
|
#include <nvbench/benchmark_base.cuh>
|
|
#include <nvbench/config.cuh>
|
|
#include <nvbench/detail/throw.cuh>
|
|
#include <nvbench/device_info.cuh>
|
|
#include <nvbench/device_manager.cuh>
|
|
#include <nvbench/git_revision.cuh>
|
|
#include <nvbench/json_printer.cuh>
|
|
#include <nvbench/state.cuh>
|
|
#include <nvbench/summary.cuh>
|
|
#include <nvbench/version.cuh>
|
|
|
|
#include <nlohmann/json.hpp>
|
|
|
|
#include <fmt/format.h>
|
|
|
|
#include <cstdint>
|
|
#include <fstream>
|
|
#include <iterator>
|
|
#include <ostream>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#if __has_include(<filesystem>)
|
|
#include <filesystem>
|
|
namespace fs = std::filesystem;
|
|
#elif __has_include(<experimental/filesystem>)
|
|
#include <experimental/filesystem>
|
|
namespace fs = std::experimental::filesystem;
|
|
#else
|
|
static_assert(false, "No <filesystem> or <experimental/filesystem> found.");
|
|
#endif
|
|
|
|
#if NVBENCH_CPP_DIALECT >= 2020
|
|
#include <bit>
|
|
#endif
|
|
|
|
namespace
|
|
{
|
|
|
|
#if NVBENCH_CPP_DIALECT >= 2020
|
|
constexpr bool is_little_endian() noexcept { return std::endian::native == std::endian::little; }
|
|
#else
|
|
bool is_little_endian() noexcept
|
|
{
|
|
const nvbench::uint32_t word = {0xBadDecaf};
|
|
nvbench::uint8_t bytes[4];
|
|
std::memcpy(bytes, &word, 4);
|
|
return bytes[0] == 0xaf;
|
|
}
|
|
#endif
|
|
|
|
template <typename JsonNode>
|
|
void write_named_values(JsonNode &node, const nvbench::named_values &values)
|
|
{
|
|
const auto value_names = values.get_names();
|
|
for (const auto &value_name : value_names)
|
|
{
|
|
auto &value = node.emplace_back();
|
|
value["name"] = value_name;
|
|
|
|
const auto type = values.get_type(value_name);
|
|
switch (type)
|
|
{
|
|
case nvbench::named_values::type::int64:
|
|
value["type"] = "int64";
|
|
// Write as a string; JSON encodes all numbers as double-precision
|
|
// floats, which would truncate int64s.
|
|
value["value"] = fmt::to_string(values.get_int64(value_name));
|
|
break;
|
|
|
|
case nvbench::named_values::type::float64:
|
|
value["type"] = "float64";
|
|
// Write as a string for consistency with int64.
|
|
value["value"] = fmt::to_string(values.get_float64(value_name));
|
|
break;
|
|
|
|
case nvbench::named_values::type::string:
|
|
value["type"] = "string";
|
|
value["value"] = values.get_string(value_name);
|
|
break;
|
|
|
|
default:
|
|
NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type.");
|
|
} // end switch (value type)
|
|
} // end foreach value name
|
|
}
|
|
|
|
template <std::size_t buffer_nbytes>
|
|
void write_out_values(std::ofstream &out, const std::vector<nvbench::float64_t> &data)
|
|
{
|
|
static constexpr std::size_t value_nbytes = sizeof(nvbench::float32_t);
|
|
static_assert(buffer_nbytes % value_nbytes == 0);
|
|
|
|
alignas(alignof(nvbench::float32_t)) char buffer[buffer_nbytes];
|
|
std::size_t bytes_in_buffer = 0;
|
|
|
|
for (auto value64 : data)
|
|
{
|
|
const auto value32 = static_cast<nvbench::float32_t>(value64);
|
|
auto value_subbuffer = &buffer[bytes_in_buffer];
|
|
std::memcpy(value_subbuffer, &value32, value_nbytes);
|
|
|
|
// the c++17 implementation of is_little_endian isn't constexpr, but
|
|
// all supported compilers optimize this branch as if it were.
|
|
if (!is_little_endian())
|
|
{
|
|
std::swap(value_subbuffer[0], value_subbuffer[3]);
|
|
std::swap(value_subbuffer[1], value_subbuffer[2]);
|
|
}
|
|
bytes_in_buffer += value_nbytes;
|
|
|
|
// if buffer is full, write it out and wrap around
|
|
if (bytes_in_buffer == buffer_nbytes)
|
|
{
|
|
out.write(buffer, static_cast<std::streamsize>(buffer_nbytes));
|
|
bytes_in_buffer = 0;
|
|
}
|
|
} // end of foreach value64 in data
|
|
|
|
if (bytes_in_buffer)
|
|
{
|
|
out.write(buffer, static_cast<std::streamsize>(bytes_in_buffer));
|
|
bytes_in_buffer = 0;
|
|
}
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
namespace nvbench
|
|
{
|
|
|
|
json_printer::version_t json_printer::get_json_file_version()
|
|
{
|
|
// This version number should stay in sync with `file_version` in
|
|
// scripts/nvbench_json/version.py.
|
|
//
|
|
// Use semantic versioning:
|
|
// Major version: backwards incompatible changes
|
|
// Minor version: backwards compatible additions
|
|
// Patch version: backwards compatible bugfixes/patches
|
|
return {1, 0, 0};
|
|
}
|
|
|
|
std::string json_printer::version_t::get_string() const
|
|
{
|
|
return fmt::format("{}.{}.{}", this->major, this->minor, this->patch);
|
|
}
|
|
|
|
void json_printer::do_process_bulk_data_float64(state &state,
|
|
const std::string &tag,
|
|
const std::string &hint,
|
|
const std::vector<nvbench::float64_t> &data)
|
|
{
|
|
printer_base::do_process_bulk_data_float64(state, tag, hint, data);
|
|
|
|
if (!m_enable_binary_output)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (hint == "sample_times")
|
|
{
|
|
nvbench::cpu_timer timer;
|
|
timer.start();
|
|
|
|
fs::path result_path{m_stream_name + "-bin/"};
|
|
try
|
|
{
|
|
if (!fs::exists(result_path))
|
|
{
|
|
if (!fs::create_directory(result_path))
|
|
{
|
|
NVBENCH_THROW(std::runtime_error, "{}", "Failed to create result directory '{}'.");
|
|
}
|
|
}
|
|
else if (!fs::is_directory(result_path))
|
|
{
|
|
NVBENCH_THROW(std::runtime_error, "{}", "'{}' exists and is not a directory.");
|
|
}
|
|
|
|
const auto file_id = m_num_jsonbin_files++;
|
|
result_path /= fmt::format("{:d}.bin", file_id);
|
|
|
|
std::ofstream out;
|
|
out.exceptions(out.exceptions() | std::ios::failbit | std::ios::badbit);
|
|
out.open(result_path, std::ios::binary | std::ios::out);
|
|
|
|
// choose buffer to be block size of modern SSD
|
|
// see: https://github.com/NVIDIA/nvbench/issues/255
|
|
constexpr std::size_t buffer_nbytes = 4096;
|
|
write_out_values<buffer_nbytes>(out, data);
|
|
}
|
|
catch (std::exception &e)
|
|
{
|
|
if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value())
|
|
{
|
|
auto &printer = printer_opt_ref.value().get();
|
|
printer.log(
|
|
nvbench::log_level::warn,
|
|
fmt::format("Error writing {} ({}) to {}: {}", tag, hint, result_path.string(), e.what()));
|
|
}
|
|
} // end catch
|
|
|
|
auto &summ = state.add_summary(fmt::format("nv/json/bin:{}", tag));
|
|
summ.set_string("name", "Samples Times File");
|
|
summ.set_string("hint", "file/sample_times");
|
|
summ.set_string("description",
|
|
"Binary file containing sample times as little-endian "
|
|
"float32.");
|
|
summ.set_string("filename", result_path.string());
|
|
summ.set_int64("size", static_cast<nvbench::int64_t>(data.size()));
|
|
summ.set_string("hide", "Not needed in table.");
|
|
|
|
timer.stop();
|
|
if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value())
|
|
{
|
|
auto &printer = printer_opt_ref.value().get();
|
|
printer.log(
|
|
nvbench::log_level::info,
|
|
fmt::format("Wrote '{}' in {:>6.3f}ms", result_path.string(), timer.get_duration() * 1000));
|
|
}
|
|
} // end hint == sample_times
|
|
}
|
|
|
|
static void add_devices_section(nlohmann::ordered_json &root)
|
|
{
|
|
auto &devices = root["devices"];
|
|
for (const auto &dev_info : nvbench::device_manager::get().get_devices())
|
|
{
|
|
auto &device = devices.emplace_back();
|
|
device["id"] = dev_info.get_id();
|
|
device["name"] = dev_info.get_name();
|
|
device["sm_version"] = dev_info.get_sm_version();
|
|
device["ptx_version"] = dev_info.get_ptx_version();
|
|
device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate();
|
|
device["number_of_sms"] = dev_info.get_number_of_sms();
|
|
device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm();
|
|
device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm();
|
|
device["max_threads_per_block"] = dev_info.get_max_threads_per_block();
|
|
device["registers_per_sm"] = dev_info.get_registers_per_sm();
|
|
device["registers_per_block"] = dev_info.get_registers_per_block();
|
|
device["global_memory_size"] = dev_info.get_global_memory_size();
|
|
device["global_memory_bus_peak_clock_rate"] = dev_info.get_global_memory_bus_peak_clock_rate();
|
|
device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width();
|
|
device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth();
|
|
device["l2_cache_size"] = dev_info.get_l2_cache_size();
|
|
device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm();
|
|
device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block();
|
|
device["ecc_state"] = dev_info.get_ecc_state();
|
|
}
|
|
}
|
|
|
|
void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
|
{
|
|
nlohmann::ordered_json root;
|
|
|
|
{
|
|
auto &metadata = root["meta"];
|
|
|
|
{
|
|
auto &argv = metadata["argv"];
|
|
for (const auto &arg : m_argv)
|
|
{
|
|
argv.push_back(arg);
|
|
}
|
|
} // "argv"
|
|
|
|
{
|
|
auto &version = metadata["version"];
|
|
|
|
{
|
|
const auto version_info = json_printer::get_json_file_version();
|
|
auto &json_version = version["json"];
|
|
|
|
json_version["major"] = version_info.major;
|
|
json_version["minor"] = version_info.minor;
|
|
json_version["patch"] = version_info.patch;
|
|
json_version["string"] = version_info.get_string();
|
|
} // "json"
|
|
|
|
{
|
|
auto &nvb_version = version["nvbench"];
|
|
|
|
nvb_version["major"] = NVBENCH_VERSION_MAJOR;
|
|
nvb_version["minor"] = NVBENCH_VERSION_MINOR;
|
|
nvb_version["patch"] = NVBENCH_VERSION_PATCH;
|
|
nvb_version["string"] = fmt::format("{}.{}.{}",
|
|
NVBENCH_VERSION_MAJOR,
|
|
NVBENCH_VERSION_MINOR,
|
|
NVBENCH_VERSION_PATCH);
|
|
|
|
nvb_version["git_branch"] = NVBENCH_GIT_BRANCH;
|
|
nvb_version["git_sha"] = NVBENCH_GIT_SHA1;
|
|
nvb_version["git_version"] = NVBENCH_GIT_VERSION;
|
|
nvb_version["git_is_dirty"] =
|
|
#ifdef NVBENCH_GIT_IS_DIRTY
|
|
true;
|
|
#else
|
|
false;
|
|
#endif
|
|
} // "nvbench"
|
|
} // "version"
|
|
} // "meta"
|
|
|
|
add_devices_section(root);
|
|
|
|
{
|
|
auto &benchmarks = root["benchmarks"];
|
|
for (const auto &bench_ptr : benches)
|
|
{
|
|
const auto bench_index = benchmarks.size();
|
|
auto &bench = benchmarks.emplace_back();
|
|
|
|
bench["name"] = bench_ptr->get_name();
|
|
bench["index"] = bench_index;
|
|
|
|
bench["min_samples"] = bench_ptr->get_min_samples();
|
|
bench["skip_time"] = bench_ptr->get_skip_time();
|
|
bench["timeout"] = bench_ptr->get_timeout();
|
|
|
|
auto &devices = bench["devices"];
|
|
for (const auto &dev_info : bench_ptr->get_devices())
|
|
{
|
|
devices.push_back(dev_info.get_id());
|
|
}
|
|
|
|
auto &axes = bench["axes"];
|
|
for (const auto &axis_ptr : bench_ptr->get_axes().get_axes())
|
|
{
|
|
auto &axis = axes.emplace_back();
|
|
|
|
axis["name"] = axis_ptr->get_name();
|
|
axis["type"] = axis_ptr->get_type_as_string();
|
|
axis["flags"] = axis_ptr->get_flags_as_string();
|
|
|
|
auto &values = axis["values"];
|
|
const auto axis_size = axis_ptr->get_size();
|
|
for (std::size_t i = 0; i < axis_size; ++i)
|
|
{
|
|
auto &value = values.emplace_back();
|
|
value["input_string"] = axis_ptr->get_input_string(i);
|
|
value["description"] = axis_ptr->get_description(i);
|
|
|
|
switch (axis_ptr->get_type())
|
|
{
|
|
case nvbench::axis_type::type:
|
|
value["is_active"] = static_cast<type_axis &>(*axis_ptr).get_is_active(i);
|
|
break;
|
|
|
|
case nvbench::axis_type::int64:
|
|
value["value"] = static_cast<int64_axis &>(*axis_ptr).get_value(i);
|
|
break;
|
|
|
|
case nvbench::axis_type::float64:
|
|
value["value"] = static_cast<float64_axis &>(*axis_ptr).get_value(i);
|
|
break;
|
|
|
|
case nvbench::axis_type::string:
|
|
value["value"] = static_cast<string_axis &>(*axis_ptr).get_value(i);
|
|
break;
|
|
default:
|
|
break;
|
|
} // end switch (axis type)
|
|
} // end foreach axis value
|
|
} // end foreach axis
|
|
|
|
auto &states = bench["states"];
|
|
for (const auto &exec_state : bench_ptr->get_states())
|
|
{
|
|
auto &st = states.emplace_back();
|
|
|
|
st["name"] = exec_state.get_axis_values_as_string();
|
|
|
|
st["min_samples"] = exec_state.get_min_samples();
|
|
st["skip_time"] = exec_state.get_skip_time();
|
|
st["timeout"] = exec_state.get_timeout();
|
|
|
|
st["device"] = exec_state.get_device()->get_id();
|
|
st["type_config_index"] = exec_state.get_type_config_index();
|
|
|
|
// TODO I'd like to replace this with:
|
|
// [ {"name" : <axis name>, "index": <value_index>}, ...]
|
|
// but it would take some refactoring in the data structures to get
|
|
// that information through.
|
|
::write_named_values(st["axis_values"], exec_state.get_axis_values());
|
|
|
|
auto &summaries = st["summaries"];
|
|
for (const auto &exec_summ : exec_state.get_summaries())
|
|
{
|
|
auto &summ = summaries.emplace_back();
|
|
summ["tag"] = exec_summ.get_tag();
|
|
|
|
// Write out the expected values as simple key/value pairs
|
|
nvbench::named_values summary_values = exec_summ;
|
|
if (summary_values.has_value("name"))
|
|
{
|
|
summ["name"] = summary_values.get_string("name");
|
|
summary_values.remove_value("name");
|
|
}
|
|
if (summary_values.has_value("description"))
|
|
{
|
|
summ["description"] = summary_values.get_string("description");
|
|
summary_values.remove_value("description");
|
|
}
|
|
if (summary_values.has_value("hint"))
|
|
{
|
|
summ["hint"] = summary_values.get_string("hint");
|
|
summary_values.remove_value("hint");
|
|
}
|
|
if (summary_values.has_value("hide"))
|
|
{
|
|
summ["hide"] = summary_values.get_string("hide");
|
|
summary_values.remove_value("hide");
|
|
}
|
|
|
|
// Write any additional values generically in
|
|
// ["data"] = [{name,type,value}, ...]:
|
|
if (summary_values.get_size() != 0)
|
|
{
|
|
::write_named_values(summ["data"], summary_values);
|
|
}
|
|
}
|
|
|
|
st["is_skipped"] = exec_state.is_skipped();
|
|
if (exec_state.is_skipped())
|
|
{
|
|
st["skip_reason"] = exec_state.get_skip_reason();
|
|
continue;
|
|
}
|
|
} // end foreach exec_state
|
|
} // end foreach benchmark
|
|
} // "benchmarks"
|
|
|
|
m_ostream << root.dump(2) << "\n";
|
|
}
|
|
|
|
void json_printer::do_print_benchmark_list(const benchmark_vector &benches)
|
|
{
|
|
if (benches.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
nlohmann::ordered_json root;
|
|
auto &benchmarks = root["benchmarks"];
|
|
|
|
for (const auto &bench_ptr : benches)
|
|
{
|
|
const auto bench_index = benchmarks.size();
|
|
auto &bench = benchmarks.emplace_back();
|
|
|
|
bench["name"] = bench_ptr->get_name();
|
|
bench["index"] = bench_index;
|
|
|
|
// We have to ensure that the axes are represented as an array, not an
|
|
// nil object when there are no axes.
|
|
auto &axes = bench["axes"] = nlohmann::json::array();
|
|
|
|
for (const auto &axis_ptr : bench_ptr->get_axes().get_axes())
|
|
{
|
|
auto &axis = axes.emplace_back();
|
|
|
|
axis["name"] = axis_ptr->get_name();
|
|
axis["type"] = axis_ptr->get_type_as_string();
|
|
axis["flags"] = axis_ptr->get_flags_as_string();
|
|
|
|
auto &values = axis["values"];
|
|
const auto axis_size = axis_ptr->get_size();
|
|
for (std::size_t i = 0; i < axis_size; ++i)
|
|
{
|
|
auto &value = values.emplace_back();
|
|
value["input_string"] = axis_ptr->get_input_string(i);
|
|
value["description"] = axis_ptr->get_description(i);
|
|
|
|
switch (axis_ptr->get_type())
|
|
{
|
|
case nvbench::axis_type::int64:
|
|
value["value"] = static_cast<int64_axis &>(*axis_ptr).get_value(i);
|
|
break;
|
|
|
|
case nvbench::axis_type::float64:
|
|
value["value"] = static_cast<float64_axis &>(*axis_ptr).get_value(i);
|
|
break;
|
|
|
|
case nvbench::axis_type::string:
|
|
value["value"] = static_cast<string_axis &>(*axis_ptr).get_value(i);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
} // end switch (axis type)
|
|
} // end foreach axis value
|
|
}
|
|
} // end foreach bench
|
|
|
|
m_ostream << root.dump(2) << "\n";
|
|
}
|
|
|
|
void json_printer::print_devices_json()
|
|
{
|
|
nlohmann::ordered_json root;
|
|
add_devices_section(root);
|
|
m_ostream << root.dump(2) << "\n";
|
|
}
|
|
|
|
} // namespace nvbench
|