mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Dump frequencies
This commit is contained in:
committed by
Oleksandr Pavlyk
parent
dc59f98ecd
commit
a487a38895
@@ -86,6 +86,7 @@ void measure_cold_base::initialize()
|
||||
m_dynamic_throttle_recovery_delay = m_throttle_recovery_delay;
|
||||
m_throttle_discard_count = 0;
|
||||
|
||||
m_sm_clock_rates.clear();
|
||||
m_cuda_times.clear();
|
||||
m_cpu_times.clear();
|
||||
|
||||
@@ -140,6 +141,7 @@ void measure_cold_base::record_measurements()
|
||||
}
|
||||
m_throttle_discard_count = 0;
|
||||
|
||||
m_sm_clock_rates.push_back(current_clock_rate);
|
||||
m_sm_clock_rate_accumulator += current_clock_rate;
|
||||
}
|
||||
|
||||
@@ -445,6 +447,7 @@ void measure_cold_base::generate_summaries()
|
||||
m_total_samples));
|
||||
|
||||
printer.process_bulk_data(m_state, "nv/cold/sample_times", "sample_times", m_cuda_times);
|
||||
printer.process_bulk_data(m_state, "nv/cold/sample_freqs", "sample_freqs", m_sm_clock_rates);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -122,6 +122,7 @@ protected:
|
||||
nvbench::float64_t m_max_cpu_time{};
|
||||
nvbench::float64_t m_total_cpu_time{};
|
||||
|
||||
std::vector<nvbench::float64_t> m_sm_clock_rates{};
|
||||
nvbench::float64_t m_sm_clock_rate_accumulator{};
|
||||
|
||||
std::vector<nvbench::float64_t> m_cuda_times;
|
||||
|
||||
@@ -241,6 +241,83 @@ void json_printer::do_process_bulk_data_float64(state &state,
|
||||
fmt::format("Wrote '{}' in {:>6.3f}ms", result_path.string(), timer.get_duration() * 1000));
|
||||
}
|
||||
} // end hint == sample_times
|
||||
|
||||
if (hint == "sample_freqs")
|
||||
{
|
||||
nvbench::cpu_timer timer;
|
||||
timer.start();
|
||||
|
||||
fs::path result_path{m_stream_name + "-freqs-bin/"};
|
||||
try
|
||||
{
|
||||
if (!fs::exists(result_path))
|
||||
{
|
||||
if (!fs::create_directory(result_path))
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "{}", "Failed to create result directory '{}'.");
|
||||
}
|
||||
}
|
||||
else if (!fs::is_directory(result_path))
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "{}", "'{}' exists and is not a directory.");
|
||||
}
|
||||
|
||||
const auto file_id = m_num_jsonbin_freq_files++;
|
||||
result_path /= fmt::format("{:d}.bin", file_id);
|
||||
|
||||
std::ofstream out;
|
||||
out.exceptions(out.exceptions() | std::ios::failbit | std::ios::badbit);
|
||||
out.open(result_path, std::ios::binary | std::ios::out);
|
||||
|
||||
// FIXME: SLOW -- Writing the binary file, 4 bytes at a time...
|
||||
// There are a lot of optimizations that could be done here if this ends
|
||||
// up being a noticeable bottleneck.
|
||||
for (auto value64 : data)
|
||||
{
|
||||
const auto value32 = static_cast<nvbench::float32_t>(value64);
|
||||
char buffer[4];
|
||||
std::memcpy(buffer, &value32, 4);
|
||||
// the c++17 implementation of is_little_endian isn't constexpr, but
|
||||
// all supported compilers optimize this branch as if it were.
|
||||
if (!is_little_endian())
|
||||
{
|
||||
using std::swap;
|
||||
swap(buffer[0], buffer[3]);
|
||||
swap(buffer[1], buffer[2]);
|
||||
}
|
||||
out.write(buffer, 4);
|
||||
}
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value())
|
||||
{
|
||||
auto &printer = printer_opt_ref.value().get();
|
||||
printer.log(
|
||||
nvbench::log_level::warn,
|
||||
fmt::format("Error writing {} ({}) to {}: {}", tag, hint, result_path.string(), e.what()));
|
||||
}
|
||||
} // end catch
|
||||
|
||||
auto &summ = state.add_summary(fmt::format("nv/json/freqs-bin:{}", tag));
|
||||
summ.set_string("name", "Samples Frequencies File");
|
||||
summ.set_string("hint", "file/sample_freqs");
|
||||
summ.set_string("description",
|
||||
"Binary file containing sample frequencies as little-endian "
|
||||
"float32.");
|
||||
summ.set_string("filename", result_path.string());
|
||||
summ.set_int64("size", static_cast<nvbench::int64_t>(data.size()));
|
||||
summ.set_string("hide", "Not needed in table.");
|
||||
|
||||
timer.stop();
|
||||
if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value())
|
||||
{
|
||||
auto &printer = printer_opt_ref.value().get();
|
||||
printer.log(
|
||||
nvbench::log_level::info,
|
||||
fmt::format("Wrote '{}' in {:>6.3f}ms", result_path.string(), timer.get_duration() * 1000));
|
||||
}
|
||||
} // end hint == sample_freqs
|
||||
}
|
||||
|
||||
static void add_devices_section(nlohmann::ordered_json &root)
|
||||
|
||||
@@ -73,6 +73,7 @@ protected:
|
||||
|
||||
bool m_enable_binary_output{false};
|
||||
std::size_t m_num_jsonbin_files{};
|
||||
std::size_t m_num_jsonbin_freq_files{};
|
||||
|
||||
std::vector<std::string> m_argv;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user