mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 06:48:53 +00:00
Merge pull request #70 from allisonvacanti/walltime_reports
Python / JSON updates
This commit is contained in:
@@ -63,6 +63,11 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# GCC-specific flags
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
|
||||
target_link_libraries(nvbench.build_interface INTERFACE stdc++fs)
|
||||
endif()
|
||||
|
||||
# CUDA-specific flags
|
||||
target_compile_options(nvbench.build_interface INTERFACE
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--display_error_number>
|
||||
|
||||
@@ -32,3 +32,21 @@
|
||||
#define NVBENCH_MSVC_PUSH_DISABLE_WARNING(code)
|
||||
#define NVBENCH_MSVC_POP_WARNING()
|
||||
#endif
|
||||
|
||||
// MSVC does not define __cplusplus correctly. _MSVC_LANG is used instead.
|
||||
#ifdef _MSVC_LANG
|
||||
#define NVBENCH_CPLUSPLUS _MSVC_LANG
|
||||
#else
|
||||
#define NVBENCH_CPLUSPLUS __cplusplus
|
||||
#endif
|
||||
|
||||
// Detect current dialect:
|
||||
#if NVBENCH_CPLUSPLUS < 201703L
|
||||
#error "NVBench requires a C++17 compiler."
|
||||
#elif NVBENCH_CPLUSPLUS < 202002L
|
||||
#define NVBENCH_CPP_DIALECT 2017
|
||||
#elif NVBENCH_CPLUSPLUS == 202002L
|
||||
#define NVBENCH_CPP_DIALECT 2020
|
||||
#elif NVBENCH_CPLUSPLUS > 202002L // unknown, but is higher than C++20.
|
||||
#define NVBENCH_CPP_DIALECT 2023
|
||||
#endif
|
||||
|
||||
@@ -116,10 +116,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::string &key = summ.get_name();
|
||||
const std::string &header = summ.has_value("short_name")
|
||||
? summ.get_string("short_name")
|
||||
: key;
|
||||
const std::string &tag = summ.get_tag();
|
||||
const std::string &header = summ.has_value("name")
|
||||
? summ.get_string("name")
|
||||
: tag;
|
||||
|
||||
const std::string hint = summ.has_value("hint")
|
||||
? summ.get_string("hint")
|
||||
@@ -127,31 +127,31 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
std::string value = std::visit(format_visitor, summ.get_value("value"));
|
||||
if (hint == "duration")
|
||||
{
|
||||
table.add_cell(row, key, header + " (sec)", std::move(value));
|
||||
table.add_cell(row, tag, header + " (sec)", std::move(value));
|
||||
}
|
||||
else if (hint == "item_rate")
|
||||
{
|
||||
table.add_cell(row, key, header + " (elem/sec)", std::move(value));
|
||||
table.add_cell(row, tag, header + " (elem/sec)", std::move(value));
|
||||
}
|
||||
else if (hint == "bytes")
|
||||
{
|
||||
table.add_cell(row, key, header + " (bytes)", std::move(value));
|
||||
table.add_cell(row, tag, header + " (bytes)", std::move(value));
|
||||
}
|
||||
else if (hint == "byte_rate")
|
||||
{
|
||||
table.add_cell(row, key, header + " (bytes/sec)", std::move(value));
|
||||
table.add_cell(row, tag, header + " (bytes/sec)", std::move(value));
|
||||
}
|
||||
else if (hint == "sample_size")
|
||||
{
|
||||
table.add_cell(row, key, header, std::move(value));
|
||||
table.add_cell(row, tag, header, std::move(value));
|
||||
}
|
||||
else if (hint == "percentage")
|
||||
{
|
||||
table.add_cell(row, key, header, std::move(value));
|
||||
table.add_cell(row, tag, header, std::move(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
table.add_cell(row, key, header, std::move(value));
|
||||
table.add_cell(row, tag, header, std::move(value));
|
||||
}
|
||||
}
|
||||
row++;
|
||||
|
||||
@@ -175,53 +175,50 @@ void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
{
|
||||
auto &summ = m_state.add_summary("Number of Samples (Cold)");
|
||||
auto &summ = m_state.add_summary("nv/cold/sample_size");
|
||||
summ.set_string("name", "Samples");
|
||||
summ.set_string("hint", "sample_size");
|
||||
summ.set_string("short_name", "Samples");
|
||||
summ.set_string("description",
|
||||
"Number of kernel executions in cold time measurements.");
|
||||
summ.set_string("description", "Number of isolated kernel executions");
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
const auto avg_cpu_time = m_total_cpu_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average CPU Time (Cold)");
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/mean");
|
||||
summ.set_string("name", "CPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "CPU Time");
|
||||
summ.set_string("description",
|
||||
"Average isolated kernel execution time observed "
|
||||
"from host.");
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured on host CPU)");
|
||||
summ.set_float64("value", avg_cpu_time);
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("CPU Relative Standard Deviation (Cold)");
|
||||
auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("short_name", "Noise");
|
||||
summ.set_string("description",
|
||||
"Relative standard deviation of the cold CPU execution "
|
||||
"time measurements.");
|
||||
"Relative standard deviation of isolated CPU times");
|
||||
summ.set_float64("value", m_cpu_noise);
|
||||
}
|
||||
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Cold)");
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/mean");
|
||||
summ.set_string("name", "GPU Time");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "GPU Time");
|
||||
summ.set_string("description",
|
||||
"Average isolated kernel execution time as measured "
|
||||
"by CUDA events.");
|
||||
"Mean isolated kernel execution time "
|
||||
"(measured with CUDA events)");
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("GPU Relative Standard Deviation (Cold)");
|
||||
auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative");
|
||||
summ.set_string("name", "Noise");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("short_name", "Noise");
|
||||
summ.set_string("description",
|
||||
"Relative standard deviation of the cold GPU execution "
|
||||
"time measurements.");
|
||||
"Relative standard deviation of isolated GPU times");
|
||||
summ.set_float64("value",
|
||||
m_noise_tracker.empty()
|
||||
? std::numeric_limits<nvbench::float64_t>::infinity()
|
||||
@@ -230,11 +227,11 @@ void measure_cold_base::generate_summaries()
|
||||
|
||||
if (const auto items = m_state.get_element_count(); items != 0)
|
||||
{
|
||||
auto &summ = m_state.add_summary("Element Throughput");
|
||||
auto &summ = m_state.add_summary("nv/cold/bw/item_rate");
|
||||
summ.set_string("name", "Elem/s");
|
||||
summ.set_string("hint", "item_rate");
|
||||
summ.set_string("short_name", "Elem/s");
|
||||
summ.set_string("description",
|
||||
"Number of input elements handled per second.");
|
||||
"Number of input elements processed per second");
|
||||
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
|
||||
}
|
||||
|
||||
@@ -242,12 +239,12 @@ void measure_cold_base::generate_summaries()
|
||||
{
|
||||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average Global Memory Throughput");
|
||||
auto &summ = m_state.add_summary("nv/cold/bw/global/bytes_per_second");
|
||||
summ.set_string("name", "GlobalMem BW");
|
||||
summ.set_string("hint", "byte_rate");
|
||||
summ.set_string("short_name", "GlobalMem BW");
|
||||
summ.set_string("description",
|
||||
"Number of bytes read/written per second to the CUDA "
|
||||
"device's global memory.");
|
||||
"device's global memory");
|
||||
summ.set_float64("value", avg_used_gmem_bw);
|
||||
}
|
||||
|
||||
@@ -255,14 +252,23 @@ void measure_cold_base::generate_summaries()
|
||||
const auto peak_gmem_bw = static_cast<double>(
|
||||
m_state.get_device()->get_global_memory_bus_bandwidth());
|
||||
|
||||
auto &summ = m_state.add_summary("Percent Peak Global Memory Throughput");
|
||||
auto &summ = m_state.add_summary("nv/cold/bw/global/utilization");
|
||||
summ.set_string("name", "BWUtil");
|
||||
summ.set_string("hint", "percentage");
|
||||
summ.set_string("short_name", "BWPeak");
|
||||
summ.set_string("description",
|
||||
"Global device memory throughput as a percentage of the "
|
||||
"device's peak bandwidth.");
|
||||
"Global device memory utilization as a percentage of the "
|
||||
"device's peak bandwidth");
|
||||
summ.set_float64("value", avg_used_gmem_bw / peak_gmem_bw);
|
||||
}
|
||||
} // bandwidth
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cold/walltime");
|
||||
summ.set_string("name", "Walltime");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Walltime used for isolated measurements");
|
||||
summ.set_float64("value", m_walltime_timer.get_duration());
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
// Log if a printer exists:
|
||||
@@ -309,11 +315,17 @@ void measure_cold_base::generate_summaries()
|
||||
// Log to stdout:
|
||||
printer.log(nvbench::log_level::pass,
|
||||
fmt::format("Cold: {:0.6f}ms GPU, {:0.6f}ms CPU, {:0.2f}s "
|
||||
"total GPU, {}x",
|
||||
"total GPU, {:0.2f}s total wall, {}x ",
|
||||
avg_cuda_time * 1e3,
|
||||
avg_cpu_time * 1e3,
|
||||
m_total_cuda_time,
|
||||
m_walltime_timer.get_duration(),
|
||||
m_total_samples));
|
||||
|
||||
printer.process_bulk_data(m_state,
|
||||
"nv/cold/sample_times",
|
||||
"sample_times",
|
||||
m_cuda_times);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -53,11 +53,8 @@ struct metric_traits<metric_id::dram_peak_sustained_throughput>
|
||||
static constexpr const char *metric_name =
|
||||
"dram__throughput.avg.pct_of_peak_sustained_elapsed";
|
||||
|
||||
static constexpr const char *summary =
|
||||
"Peak Sustained Global Memory Throughput (HW)";
|
||||
|
||||
static constexpr const char *hint = "percentage";
|
||||
static constexpr const char *short_name = "HBWPeak";
|
||||
static constexpr const char *name = "HBWPeak";
|
||||
static constexpr const char *hint = "percentage";
|
||||
|
||||
static constexpr const char *description =
|
||||
"The utilization level of the device memory relative to the peak "
|
||||
@@ -77,9 +74,8 @@ struct metric_traits<metric_id::global_load_efficiency>
|
||||
static constexpr const char *metric_name =
|
||||
"smsp__sass_average_data_bytes_per_sector_mem_global_op_ld.pct";
|
||||
|
||||
static constexpr const char *summary = "Global Load Efficiency (HW)";
|
||||
static constexpr const char *hint = "percentage";
|
||||
static constexpr const char *short_name = "LoadEff";
|
||||
static constexpr const char *name = "LoadEff";
|
||||
static constexpr const char *hint = "percentage";
|
||||
|
||||
static constexpr const char *description =
|
||||
"Ratio of requested global memory load throughput to required global "
|
||||
@@ -99,9 +95,8 @@ struct metric_traits<metric_id::global_store_efficiency>
|
||||
static constexpr const char *metric_name =
|
||||
"smsp__sass_average_data_bytes_per_sector_mem_global_op_st.pct";
|
||||
|
||||
static constexpr const char *summary = "Global Store Efficiency (HW)";
|
||||
static constexpr const char *hint = "percentage";
|
||||
static constexpr const char *short_name = "StoreEff";
|
||||
static constexpr const char *name = "StoreEff";
|
||||
static constexpr const char *hint = "percentage";
|
||||
|
||||
static constexpr const char *description =
|
||||
"Ratio of requested global memory store throughput to required global "
|
||||
@@ -119,9 +114,8 @@ template <>
|
||||
struct metric_traits<metric_id::l1_hit_rate>
|
||||
{
|
||||
static constexpr const char *metric_name = "l1tex__t_sector_hit_rate.pct";
|
||||
static constexpr const char *summary = "L1 Cache Hit Rate (HW)";
|
||||
static constexpr const char *name = "L1HitRate";
|
||||
static constexpr const char *hint = "percentage";
|
||||
static constexpr const char *short_name = "L1HitRate";
|
||||
static constexpr const char *description = "Hit rate at L1 cache.";
|
||||
static constexpr double divider = 100.0;
|
||||
|
||||
@@ -135,9 +129,8 @@ template <>
|
||||
struct metric_traits<metric_id::l2_hit_rate>
|
||||
{
|
||||
static constexpr const char *metric_name = "lts__t_sector_hit_rate.pct";
|
||||
static constexpr const char *summary = "L2 Cache Hit Rate (HW)";
|
||||
static constexpr const char *name = "L2HitRate";
|
||||
static constexpr const char *hint = "percentage";
|
||||
static constexpr const char *short_name = "L2HitRate";
|
||||
static constexpr const char *description = "Hit rate at L2 cache.";
|
||||
static constexpr double divider = 100.0;
|
||||
|
||||
@@ -219,9 +212,10 @@ void gen_summary(std::size_t result_id,
|
||||
|
||||
if (metric::is_collected(m_state))
|
||||
{
|
||||
auto &summ = m_state.add_summary(metric::summary);
|
||||
auto &summ =
|
||||
m_state.add_summary(fmt::format("nv/cupti/{}", metric::metric_name));
|
||||
summ.set_string("name", metric::name);
|
||||
summ.set_string("hint", metric::hint);
|
||||
summ.set_string("short_name", metric::short_name);
|
||||
summ.set_string("description", metric::description);
|
||||
summ.set_float64("value", result[result_id++] / metric::divider);
|
||||
}
|
||||
@@ -247,6 +241,34 @@ void measure_cupti_base::generate_summaries()
|
||||
try
|
||||
{
|
||||
gen_summaries(m_state, m_cupti.get_counter_values());
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cupti/sample_size");
|
||||
summ.set_string("name", "Samples");
|
||||
summ.set_string("hint", "sample_size");
|
||||
summ.set_string("description", "Number of CUPTI kernel executions");
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/cupti/walltime");
|
||||
summ.set_string("name", "Walltime");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Walltime used for CUPTI measurements");
|
||||
summ.set_float64("value", m_walltime_timer.get_duration());
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
// Log if a printer exists:
|
||||
if (auto printer_opt_ref = m_state.get_benchmark().get_printer();
|
||||
printer_opt_ref.has_value())
|
||||
{
|
||||
auto &printer = printer_opt_ref.value().get();
|
||||
printer.log(nvbench::log_level::pass,
|
||||
fmt::format("CUPTI: {:0.2f}s total wall, {}x",
|
||||
m_walltime_timer.get_duration(),
|
||||
m_total_samples));
|
||||
}
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
|
||||
@@ -75,8 +75,11 @@ protected:
|
||||
|
||||
nvbench::launch m_launch;
|
||||
nvbench::detail::l2flush m_l2flush;
|
||||
nvbench::cpu_timer m_walltime_timer;
|
||||
|
||||
cupti_profiler m_cupti;
|
||||
|
||||
nvbench::int64_t m_total_samples{};
|
||||
};
|
||||
|
||||
struct measure_cupti_base::kernel_launch_timer
|
||||
@@ -129,6 +132,9 @@ private:
|
||||
// Run the kernel as many times as CUPTI requires.
|
||||
void run()
|
||||
{
|
||||
m_walltime_timer.start();
|
||||
m_total_samples = 0;
|
||||
|
||||
kernel_launch_timer timer(*this);
|
||||
|
||||
m_cupti.prepare_user_loop();
|
||||
@@ -136,9 +142,12 @@ private:
|
||||
do
|
||||
{
|
||||
m_kernel_launcher(m_launch, timer);
|
||||
++m_total_samples;
|
||||
} while (m_cupti.is_replay_required());
|
||||
|
||||
m_cupti.process_user_loop();
|
||||
|
||||
m_walltime_timer.stop();
|
||||
}
|
||||
|
||||
KernelLauncher &m_kernel_launcher;
|
||||
|
||||
@@ -47,7 +47,7 @@ measure_hot_base::measure_hot_base(state &exec_state)
|
||||
try
|
||||
{
|
||||
nvbench::int64_t cold_samples =
|
||||
m_state.get_summary("Number of Samples (Cold)").get_int64("value");
|
||||
m_state.get_summary("nv/cold/sample_size").get_int64("value");
|
||||
m_min_samples = std::max(m_min_samples, cold_samples);
|
||||
|
||||
// If the cold measurement ran successfully, disable skip_time. It'd just
|
||||
@@ -85,25 +85,33 @@ void measure_hot_base::check()
|
||||
|
||||
void measure_hot_base::generate_summaries()
|
||||
{
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
const auto d_samples = static_cast<double>(m_total_samples);
|
||||
{
|
||||
auto &summ = m_state.add_summary("nv/batch/sample_size");
|
||||
summ.set_string("name", "Samples");
|
||||
summ.set_string("hint", "sample_size");
|
||||
summ.set_string("description", "Number of batch kernel executions");
|
||||
summ.set_int64("value", m_total_samples);
|
||||
}
|
||||
|
||||
const auto avg_cuda_time = m_total_cuda_time / d_samples;
|
||||
{
|
||||
auto &summ = m_state.add_summary("Average GPU Time (Batch)");
|
||||
auto &summ = m_state.add_summary("nv/batch/time/gpu/mean");
|
||||
summ.set_string("name", "Batch GPU");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("short_name", "Batch GPU");
|
||||
summ.set_string("description",
|
||||
"Average back-to-back kernel execution time as measured "
|
||||
"by CUDA events.");
|
||||
"Mean batch kernel execution time "
|
||||
"(measured by CUDA events)");
|
||||
summ.set_float64("value", avg_cuda_time);
|
||||
}
|
||||
|
||||
{
|
||||
auto &summ = m_state.add_summary("Number of Samples (Batch)");
|
||||
summ.set_string("hint", "sample_size");
|
||||
summ.set_string("short_name", "Batch");
|
||||
summ.set_string("description",
|
||||
"Number of kernel executions in hot time measurements.");
|
||||
summ.set_int64("value", m_total_samples);
|
||||
auto &summ = m_state.add_summary("nv/batch/walltime");
|
||||
summ.set_string("name", "Walltime");
|
||||
summ.set_string("hint", "duration");
|
||||
summ.set_string("description", "Walltime used for batch measurements");
|
||||
summ.set_float64("value", m_walltime_timer.get_duration());
|
||||
summ.set_string("hide", "Hidden by default.");
|
||||
}
|
||||
|
||||
// Log if a printer exists:
|
||||
@@ -115,7 +123,7 @@ void measure_hot_base::generate_summaries()
|
||||
// Warn if timed out:
|
||||
if (m_max_time_exceeded)
|
||||
{
|
||||
const auto timeout = m_timeout_timer.get_duration();
|
||||
const auto timeout = m_walltime_timer.get_duration();
|
||||
|
||||
if (m_total_samples < m_min_samples)
|
||||
{
|
||||
@@ -140,9 +148,11 @@ void measure_hot_base::generate_summaries()
|
||||
|
||||
// Log to stdout:
|
||||
printer.log(nvbench::log_level::pass,
|
||||
fmt::format("Batch: {:0.6f}ms GPU, {:0.2f}s total GPU, {}x",
|
||||
fmt::format("Batch: {:0.6f}ms GPU, {:0.2f}s total GPU, "
|
||||
"{:0.2f}s total wall, {}x",
|
||||
avg_cuda_time * 1e3,
|
||||
m_total_cuda_time,
|
||||
m_walltime_timer.get_duration(),
|
||||
m_total_samples));
|
||||
}
|
||||
}
|
||||
@@ -163,8 +173,7 @@ void measure_hot_base::check_skip_time(nvbench::float64_t warmup_time)
|
||||
|
||||
void measure_hot_base::block_stream()
|
||||
{
|
||||
m_blocker.block(m_launch.get_stream(),
|
||||
m_state.get_blocking_kernel_timeout());
|
||||
m_blocker.block(m_launch.get_stream(), m_state.get_blocking_kernel_timeout());
|
||||
}
|
||||
|
||||
} // namespace nvbench::detail
|
||||
|
||||
@@ -68,7 +68,7 @@ protected:
|
||||
|
||||
nvbench::launch m_launch;
|
||||
nvbench::cuda_timer m_cuda_timer;
|
||||
nvbench::cpu_timer m_timeout_timer;
|
||||
nvbench::cpu_timer m_walltime_timer;
|
||||
nvbench::blocking_kernel m_blocker;
|
||||
|
||||
nvbench::int64_t m_min_samples{};
|
||||
@@ -125,7 +125,7 @@ private:
|
||||
|
||||
void run_trials()
|
||||
{
|
||||
m_timeout_timer.start();
|
||||
m_walltime_timer.start();
|
||||
|
||||
// Use warmup results to estimate the number of iterations to run.
|
||||
// The .95 factor here pads the batch_size a bit to avoid needing a second
|
||||
@@ -183,21 +183,22 @@ private:
|
||||
(m_min_time - m_total_cuda_time) /
|
||||
(m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples)));
|
||||
|
||||
m_timeout_timer.stop();
|
||||
const auto total_time = m_timeout_timer.get_duration();
|
||||
|
||||
if (m_total_cuda_time > m_min_time && // min time okay
|
||||
m_total_samples > m_min_samples) // min samples okay
|
||||
{
|
||||
break; // Stop iterating
|
||||
}
|
||||
|
||||
if (total_time > m_timeout)
|
||||
|
||||
m_walltime_timer.stop();
|
||||
if (m_walltime_timer.get_duration() > m_timeout)
|
||||
{
|
||||
m_max_time_exceeded = true;
|
||||
break;
|
||||
}
|
||||
} while (true);
|
||||
|
||||
m_walltime_timer.stop();
|
||||
}
|
||||
|
||||
__forceinline__ void launch_kernel() { m_kernel_launcher(m_launch); }
|
||||
|
||||
@@ -20,42 +20,75 @@
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/git_revision.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
#include <nvbench/version.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <ostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <experimental/filesystem>
|
||||
#else
|
||||
#include <filesystem>
|
||||
#endif
|
||||
|
||||
#if NVBENCH_CPP_DIALECT >= 2020
|
||||
#include <bit>
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool is_little_endian()
|
||||
{
|
||||
#if NVBENCH_CPP_DIALECT >= 2020
|
||||
return std::endian::native == std::endian::little;
|
||||
#else
|
||||
const nvbench::uint32_t word = {0xBadDecaf};
|
||||
nvbench::uint8_t bytes[4];
|
||||
std::memcpy(bytes, &word, 4);
|
||||
return bytes[0] == 0xaf;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename JsonNode>
|
||||
void write_named_values(JsonNode &node, const nvbench::named_values &values)
|
||||
{
|
||||
const auto value_names = values.get_names();
|
||||
for (const auto &value_name : value_names)
|
||||
{
|
||||
auto &value = node[value_name];
|
||||
auto &value = node.emplace_back();
|
||||
value["name"] = value_name;
|
||||
|
||||
const auto type = values.get_type(value_name);
|
||||
switch (type)
|
||||
{
|
||||
case nvbench::named_values::type::int64:
|
||||
value["type"] = "int64";
|
||||
value["type"] = "int64";
|
||||
// Write as a string; JSON encodes all numbers as double-precision
|
||||
// floats, which would truncate int64s.
|
||||
value["value"] = fmt::to_string(values.get_int64(value_name));
|
||||
break;
|
||||
|
||||
case nvbench::named_values::type::float64:
|
||||
value["type"] = "float64";
|
||||
value["type"] = "float64";
|
||||
// Write as a string for consistency with int64.
|
||||
value["value"] = fmt::to_string(values.get_float64(value_name));
|
||||
break;
|
||||
@@ -64,6 +97,9 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
|
||||
value["type"] = "string";
|
||||
value["value"] = values.get_string(value_name);
|
||||
break;
|
||||
|
||||
default:
|
||||
NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type.");
|
||||
} // end switch (value type)
|
||||
} // end foreach value name
|
||||
}
|
||||
@@ -73,15 +109,187 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
json_printer::version_t json_printer::get_json_file_version()
|
||||
{
|
||||
// This version number should stay in sync with `file_version` in
|
||||
// scripts/nvbench_json/version.py.
|
||||
//
|
||||
// Use semantic versioning:
|
||||
// Major version: backwards incompatible changes
|
||||
// Minor version: backwards compatible additions
|
||||
// Patch version: backwards compatible bugfixes/patches
|
||||
return {1, 0, 0};
|
||||
}
|
||||
|
||||
std::string json_printer::version_t::get_string() const
|
||||
{
|
||||
return fmt::format("{}.{}.{}", this->major, this->minor, this->patch);
|
||||
}
|
||||
|
||||
void json_printer::do_process_bulk_data_float64(
|
||||
state &state,
|
||||
const std::string &tag,
|
||||
const std::string &hint,
|
||||
const std::vector<nvbench::float64_t> &data)
|
||||
{
|
||||
printer_base::do_process_bulk_data_float64(state, tag, hint, data);
|
||||
|
||||
if (!m_enable_binary_output)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (hint == "sample_times")
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#else
|
||||
namespace fs = std::filesystem;
|
||||
#endif
|
||||
|
||||
nvbench::cpu_timer timer;
|
||||
timer.start();
|
||||
|
||||
fs::path result_path{m_stream_name + "-bin/"};
|
||||
try
|
||||
{
|
||||
if (!fs::exists(result_path))
|
||||
{
|
||||
if (!fs::create_directory(result_path))
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"{}",
|
||||
"Failed to create result directory '{}'.");
|
||||
}
|
||||
}
|
||||
else if (!fs::is_directory(result_path))
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"{}",
|
||||
"'{}' exists and is not a directory.");
|
||||
}
|
||||
|
||||
const auto file_id = m_num_jsonbin_files++;
|
||||
result_path /= fmt::format("{:d}.bin", file_id);
|
||||
|
||||
std::ofstream out;
|
||||
out.exceptions(out.exceptions() | std::ios::failbit | std::ios::badbit);
|
||||
out.open(result_path, std::ios::binary | std::ios::out);
|
||||
|
||||
// FIXME: SLOW -- Writing the binary file, 4 bytes at a time...
|
||||
// There are a lot of optimizations that could be done here if this ends
|
||||
// up being a noticeable bottleneck.
|
||||
for (auto value64 : data)
|
||||
{
|
||||
const auto value32 = static_cast<nvbench::float32_t>(value64);
|
||||
char buffer[4];
|
||||
std::memcpy(buffer, &value32, 4);
|
||||
// the c++17 implementation of is_little_endian isn't constexpr, but
|
||||
// all supported compilers optimize this branch as if it were.
|
||||
if (!is_little_endian())
|
||||
{
|
||||
using std::swap;
|
||||
swap(buffer[0], buffer[3]);
|
||||
swap(buffer[1], buffer[2]);
|
||||
}
|
||||
out.write(buffer, 4);
|
||||
}
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
if (auto printer_opt_ref = state.get_benchmark().get_printer();
|
||||
printer_opt_ref.has_value())
|
||||
{
|
||||
auto &printer = printer_opt_ref.value().get();
|
||||
printer.log(nvbench::log_level::warn,
|
||||
fmt::format("Error writing {} ({}) to {}: {}",
|
||||
tag,
|
||||
hint,
|
||||
result_path.string(),
|
||||
e.what()));
|
||||
}
|
||||
} // end catch
|
||||
|
||||
auto &summ = state.add_summary(fmt::format("nv/json/bin:{}", tag));
|
||||
summ.set_string("name", "Samples Times File");
|
||||
summ.set_string("hint", "file/sample_times");
|
||||
summ.set_string("description",
|
||||
"Binary file containing sample times as little-endian "
|
||||
"float32.");
|
||||
summ.set_string("filename", result_path.string());
|
||||
summ.set_int64("size", static_cast<nvbench::int64_t>(data.size()));
|
||||
summ.set_string("hide", "Not needed in table.");
|
||||
|
||||
timer.stop();
|
||||
if (auto printer_opt_ref = state.get_benchmark().get_printer();
|
||||
printer_opt_ref.has_value())
|
||||
{
|
||||
auto &printer = printer_opt_ref.value().get();
|
||||
printer.log(nvbench::log_level::info,
|
||||
fmt::format("Wrote '{}' in {:>6.3f}ms",
|
||||
result_path.string(),
|
||||
timer.get_duration() * 1000));
|
||||
}
|
||||
} // end hint == sample_times
|
||||
}
|
||||
|
||||
void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
{
|
||||
nlohmann::ordered_json root;
|
||||
|
||||
{
|
||||
auto &metadata = root["meta"];
|
||||
|
||||
{
|
||||
auto &argv = metadata["argv"];
|
||||
for (const auto &arg : m_argv)
|
||||
{
|
||||
argv.push_back(arg);
|
||||
}
|
||||
} // "argv"
|
||||
|
||||
{
|
||||
auto &version = metadata["version"];
|
||||
|
||||
{
|
||||
const auto version_info = json_printer::get_json_file_version();
|
||||
auto &json_version = version["json"];
|
||||
|
||||
json_version["major"] = version_info.major;
|
||||
json_version["minor"] = version_info.minor;
|
||||
json_version["patch"] = version_info.patch;
|
||||
json_version["string"] = version_info.get_string();
|
||||
} // "json"
|
||||
|
||||
{
|
||||
auto &nvb_version = version["nvbench"];
|
||||
|
||||
nvb_version["major"] = NVBENCH_VERSION_MAJOR;
|
||||
nvb_version["minor"] = NVBENCH_VERSION_MINOR;
|
||||
nvb_version["patch"] = NVBENCH_VERSION_PATCH;
|
||||
nvb_version["string"] = fmt::format("{}.{}.{}",
|
||||
NVBENCH_VERSION_MAJOR,
|
||||
NVBENCH_VERSION_MINOR,
|
||||
NVBENCH_VERSION_PATCH);
|
||||
|
||||
nvb_version["git_branch"] = NVBENCH_GIT_BRANCH;
|
||||
nvb_version["git_sha"] = NVBENCH_GIT_SHA1;
|
||||
nvb_version["git_version"] = NVBENCH_GIT_VERSION;
|
||||
nvb_version["git_is_dirty"] =
|
||||
#ifdef NVBENCH_GIT_IS_DIRTY
|
||||
true;
|
||||
#else
|
||||
false;
|
||||
#endif
|
||||
} // "nvbench"
|
||||
} // "version"
|
||||
} // "meta"
|
||||
|
||||
{
|
||||
auto &devices = root["devices"];
|
||||
for (const auto &dev_info : nvbench::device_manager::get().get_devices())
|
||||
{
|
||||
auto &device = devices[devices.size()];
|
||||
auto &device = devices.emplace_back();
|
||||
device["id"] = dev_info.get_id();
|
||||
device["name"] = dev_info.get_name();
|
||||
device["sm_version"] = dev_info.get_sm_version();
|
||||
@@ -106,17 +314,17 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
dev_info.get_shared_memory_per_block();
|
||||
device["ecc_state"] = dev_info.get_ecc_state();
|
||||
}
|
||||
}
|
||||
} // "devices"
|
||||
|
||||
{
|
||||
auto &benchmarks = root["benchmarks"];
|
||||
for (const auto &bench_ptr : benches)
|
||||
{
|
||||
const auto bench_index = benchmarks.size();
|
||||
auto &bench = benchmarks[bench_index];
|
||||
auto &bench = benchmarks.emplace_back();
|
||||
|
||||
bench["index"] = bench_index;
|
||||
bench["name"] = bench_ptr->get_name();
|
||||
bench["index"] = bench_index;
|
||||
|
||||
bench["min_samples"] = bench_ptr->get_min_samples();
|
||||
bench["min_time"] = bench_ptr->get_min_time();
|
||||
@@ -133,8 +341,9 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
auto &axes = bench["axes"];
|
||||
for (const auto &axis_ptr : bench_ptr->get_axes().get_axes())
|
||||
{
|
||||
auto &axis = axes[axis_ptr->get_name()];
|
||||
auto &axis = axes.emplace_back();
|
||||
|
||||
axis["name"] = axis_ptr->get_name();
|
||||
axis["type"] = axis_ptr->get_type_as_string();
|
||||
axis["flags"] = axis_ptr->get_flags_as_string();
|
||||
|
||||
@@ -142,8 +351,7 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
const auto axis_size = axis_ptr->get_size();
|
||||
for (std::size_t i = 0; i < axis_size; ++i)
|
||||
{
|
||||
const auto value_idx = values.size();
|
||||
auto &value = values[value_idx];
|
||||
auto &value = values.emplace_back();
|
||||
value["input_string"] = axis_ptr->get_input_string(i);
|
||||
value["description"] = axis_ptr->get_description(i);
|
||||
|
||||
@@ -177,13 +385,9 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
auto &states = bench["states"];
|
||||
for (const auto &exec_state : bench_ptr->get_states())
|
||||
{
|
||||
auto &st = states[exec_state.get_axis_values_as_string()];
|
||||
auto &st = states.emplace_back();
|
||||
|
||||
// TODO: Determine if these need to be part of the state key as well
|
||||
// for uniqueness. The device already is, but the type config index is
|
||||
// not.
|
||||
st["device"] = exec_state.get_device()->get_id();
|
||||
st["type_config_index"] = exec_state.get_type_config_index();
|
||||
st["name"] = exec_state.get_axis_values_as_string();
|
||||
|
||||
st["min_samples"] = exec_state.get_min_samples();
|
||||
st["min_time"] = exec_state.get_min_time();
|
||||
@@ -191,13 +395,50 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
st["skip_time"] = exec_state.get_skip_time();
|
||||
st["timeout"] = exec_state.get_timeout();
|
||||
|
||||
st["device"] = exec_state.get_device()->get_id();
|
||||
st["type_config_index"] = exec_state.get_type_config_index();
|
||||
|
||||
// TODO I'd like to replace this with:
|
||||
// [ {"name" : <axis name>, "index": <value_index>}, ...]
|
||||
// but it would take some refactoring in the data structures to get
|
||||
// that information through.
|
||||
::write_named_values(st["axis_values"], exec_state.get_axis_values());
|
||||
|
||||
auto &summaries = st["summaries"];
|
||||
for (const auto &exec_summ : exec_state.get_summaries())
|
||||
{
|
||||
auto &summ = summaries[exec_summ.get_name()];
|
||||
::write_named_values(summ, exec_summ);
|
||||
auto &summ = summaries.emplace_back();
|
||||
summ["tag"] = exec_summ.get_tag();
|
||||
|
||||
// Write out the expected values as simple key/value pairs
|
||||
nvbench::named_values summary_values = exec_summ;
|
||||
if (summary_values.has_value("name"))
|
||||
{
|
||||
summ["name"] = summary_values.get_string("name");
|
||||
summary_values.remove_value("name");
|
||||
}
|
||||
if (summary_values.has_value("description"))
|
||||
{
|
||||
summ["description"] = summary_values.get_string("description");
|
||||
summary_values.remove_value("description");
|
||||
}
|
||||
if (summary_values.has_value("hint"))
|
||||
{
|
||||
summ["hint"] = summary_values.get_string("hint");
|
||||
summary_values.remove_value("hint");
|
||||
}
|
||||
if (summary_values.has_value("hide"))
|
||||
{
|
||||
summ["hide"] = summary_values.get_string("hide");
|
||||
summary_values.remove_value("hide");
|
||||
}
|
||||
|
||||
// Write any additional values generically in
|
||||
// ["data"] = [{name,type,value}, ...]:
|
||||
if (summary_values.get_size() != 0)
|
||||
{
|
||||
::write_named_values(summ["data"], summary_values);
|
||||
}
|
||||
}
|
||||
|
||||
st["is_skipped"] = exec_state.is_skipped();
|
||||
@@ -208,7 +449,7 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
}
|
||||
} // end foreach exec_state
|
||||
} // end foreach benchmark
|
||||
}
|
||||
} // "benchmarks"
|
||||
|
||||
m_ostream << root.dump(2) << "\n";
|
||||
}
|
||||
|
||||
@@ -20,19 +20,68 @@
|
||||
|
||||
#include <nvbench/printer_base.cuh>
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
/*!
|
||||
* JSON output format.
|
||||
*
|
||||
* All modifications to the output file should increment the semantic version
|
||||
* of the json files appropriately (see json_printer::get_json_file_version()).
|
||||
*/
|
||||
struct json_printer : nvbench::printer_base
|
||||
{
|
||||
using printer_base::printer_base;
|
||||
|
||||
json_printer(std::ostream &stream,
|
||||
std::string stream_name,
|
||||
bool enable_binary_output)
|
||||
: printer_base(stream, std::move(stream_name))
|
||||
, m_enable_binary_output{enable_binary_output}
|
||||
{}
|
||||
|
||||
/**
|
||||
* The json schema version. Follows semantic versioning.
|
||||
*/
|
||||
struct version_t
|
||||
{
|
||||
nvbench::uint16_t major;
|
||||
nvbench::uint16_t minor;
|
||||
nvbench::uint16_t patch;
|
||||
|
||||
[[nodiscard]] std::string get_string() const;
|
||||
};
|
||||
|
||||
[[nodiscard]] static version_t get_json_file_version();
|
||||
|
||||
[[nodiscard]] bool get_enable_binary_output() const
|
||||
{
|
||||
return m_enable_binary_output;
|
||||
}
|
||||
void set_enable_binary_output(bool b) { m_enable_binary_output = b; }
|
||||
|
||||
protected:
|
||||
// Virtual API from printer_base:
|
||||
void do_log_argv(const std::vector<std::string>& argv) override
|
||||
{
|
||||
m_argv = argv;
|
||||
}
|
||||
void do_process_bulk_data_float64(
|
||||
nvbench::state &state,
|
||||
const std::string &tag,
|
||||
const std::string &hint,
|
||||
const std::vector<nvbench::float64_t> &data) override;
|
||||
void do_print_benchmark_results(const benchmark_vector &benches) override;
|
||||
|
||||
bool m_enable_binary_output{false};
|
||||
std::size_t m_num_jsonbin_files{};
|
||||
|
||||
std::vector<std::string> m_argv;
|
||||
};
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -307,43 +307,43 @@ void markdown_printer::do_print_benchmark_results(
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::string &key = summ.get_name();
|
||||
const std::string &header = summ.has_value("short_name")
|
||||
? summ.get_string("short_name")
|
||||
: key;
|
||||
const std::string &tag = summ.get_tag();
|
||||
const std::string &header = summ.has_value("name")
|
||||
? summ.get_string("name")
|
||||
: tag;
|
||||
|
||||
std::string hint = summ.has_value("hint") ? summ.get_string("hint")
|
||||
: std::string{};
|
||||
if (hint == "duration")
|
||||
{
|
||||
table.add_cell(row, key, header, this->do_format_duration(summ));
|
||||
table.add_cell(row, tag, header, this->do_format_duration(summ));
|
||||
}
|
||||
else if (hint == "item_rate")
|
||||
{
|
||||
table.add_cell(row, key, header, this->do_format_item_rate(summ));
|
||||
table.add_cell(row, tag, header, this->do_format_item_rate(summ));
|
||||
}
|
||||
else if (hint == "bytes")
|
||||
{
|
||||
table.add_cell(row, key, header, this->do_format_bytes(summ));
|
||||
table.add_cell(row, tag, header, this->do_format_bytes(summ));
|
||||
}
|
||||
else if (hint == "byte_rate")
|
||||
{
|
||||
table.add_cell(row, key, header, this->do_format_byte_rate(summ));
|
||||
table.add_cell(row, tag, header, this->do_format_byte_rate(summ));
|
||||
}
|
||||
else if (hint == "sample_size")
|
||||
{
|
||||
table.add_cell(row,
|
||||
key,
|
||||
tag,
|
||||
header,
|
||||
this->do_format_sample_size(summ));
|
||||
}
|
||||
else if (hint == "percentage")
|
||||
{
|
||||
table.add_cell(row, key, header, this->do_format_percentage(summ));
|
||||
table.add_cell(row, tag, header, this->do_format_percentage(summ));
|
||||
}
|
||||
else
|
||||
{
|
||||
table.add_cell(row, key, header, this->do_format_default(summ));
|
||||
table.add_cell(row, tag, header, this->do_format_default(summ));
|
||||
}
|
||||
}
|
||||
row++;
|
||||
|
||||
@@ -375,6 +375,8 @@ void option_parser::parse_impl()
|
||||
}
|
||||
|
||||
this->update_used_device_state();
|
||||
|
||||
m_printer.log_argv(m_args);
|
||||
}
|
||||
|
||||
void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
@@ -468,7 +470,13 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
|
||||
else if (arg == "--json")
|
||||
{
|
||||
check_params(1);
|
||||
this->add_json_printer(first[1]);
|
||||
this->add_json_printer(first[1], false);
|
||||
first += 2;
|
||||
}
|
||||
else if (arg == "--jsonbin")
|
||||
{
|
||||
check_params(1);
|
||||
this->add_json_printer(first[1], true);
|
||||
first += 2;
|
||||
}
|
||||
else if (arg == "--benchmark" || arg == "-b")
|
||||
@@ -515,7 +523,7 @@ void option_parser::add_markdown_printer(const std::string &spec)
|
||||
try
|
||||
{
|
||||
std::ostream &stream = this->printer_spec_to_ostream(spec);
|
||||
auto &printer = m_printer.emplace<nvbench::markdown_printer>(stream);
|
||||
auto &printer = m_printer.emplace<nvbench::markdown_printer>(stream, spec);
|
||||
if (spec == "stdout")
|
||||
{
|
||||
printer.set_color(m_color_md_stdout_printer);
|
||||
@@ -533,7 +541,7 @@ void option_parser::add_csv_printer(const std::string &spec)
|
||||
try
|
||||
{
|
||||
std::ostream &stream = this->printer_spec_to_ostream(spec);
|
||||
m_printer.emplace<nvbench::csv_printer>(stream);
|
||||
m_printer.emplace<nvbench::csv_printer>(stream, spec);
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
@@ -543,16 +551,18 @@ catch (std::exception &e)
|
||||
e.what());
|
||||
}
|
||||
|
||||
void option_parser::add_json_printer(const std::string &spec)
|
||||
void option_parser::add_json_printer(const std::string &spec,
|
||||
bool enable_binary)
|
||||
try
|
||||
{
|
||||
std::ostream &stream = this->printer_spec_to_ostream(spec);
|
||||
m_printer.emplace<nvbench::json_printer>(stream);
|
||||
m_printer.emplace<nvbench::json_printer>(stream, spec, enable_binary);
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"Error while adding json output for `{}`:\n{}",
|
||||
"Error while adding {} output for `{}`:\n{}",
|
||||
enable_binary ? "jsonbin" : "json",
|
||||
spec,
|
||||
e.what());
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ private:
|
||||
|
||||
void add_markdown_printer(const std::string &spec);
|
||||
void add_csv_printer(const std::string &spec);
|
||||
void add_json_printer(const std::string &spec);
|
||||
void add_json_printer(const std::string &spec, bool enable_binary);
|
||||
|
||||
std::ostream &printer_spec_to_ostream(const std::string &spec);
|
||||
|
||||
@@ -121,7 +121,7 @@ private:
|
||||
|
||||
void update_used_device_state() const;
|
||||
|
||||
// less gross argv:
|
||||
// Command line args
|
||||
std::vector<std::string> m_args;
|
||||
|
||||
// Store benchmark modifiers passed in before any benchmarks are requested as
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@@ -55,7 +57,22 @@ struct printer_base
|
||||
/*!
|
||||
* Construct a new printer_base that will write to ostream.
|
||||
*/
|
||||
explicit printer_base(std::ostream &ostream);
|
||||
explicit printer_base(std::ostream &ostream)
|
||||
: printer_base(ostream, {})
|
||||
{}
|
||||
|
||||
/*!
|
||||
* Construct a new print_base that will write to an ostream, described by
|
||||
* stream_name.
|
||||
*
|
||||
* `stream_name` is used to open any additional files needed by the printer.
|
||||
* If `ostream` is a file stream, use the filename. Stream name may be
|
||||
* "stdout" / "stderr" or empty.
|
||||
* @param ostream
|
||||
* @param stream_name
|
||||
*/
|
||||
explicit printer_base(std::ostream &ostream, std::string stream_name);
|
||||
|
||||
virtual ~printer_base();
|
||||
|
||||
// move-only
|
||||
@@ -64,6 +81,15 @@ struct printer_base
|
||||
printer_base &operator=(const printer_base &) = delete;
|
||||
printer_base &operator=(printer_base &&) = default;
|
||||
|
||||
/*!
|
||||
* Called once with the command line arguments used to invoke the current
|
||||
* executable.
|
||||
*/
|
||||
void log_argv(const std::vector<std::string> &argv)
|
||||
{
|
||||
this->do_log_argv(argv);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Print a summary of all detected devices, if supported.
|
||||
*
|
||||
@@ -96,6 +122,31 @@ struct printer_base
|
||||
this->do_log_run_state(exec_state);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Measurements may call this to allow a printer to perform extra processing
|
||||
* on large sets of data.
|
||||
*
|
||||
* @param state The `nvbench::state` associated with this measurement.
|
||||
*
|
||||
* @param tag A tag identifying the data. Tags must be unique within a state,
|
||||
* but the same tag may be reused in multiple states. Data produced
|
||||
* by NVBench will be prefixed with "nv/", for example, isolated
|
||||
* sample time measurements are tagged "nv/cold/sample_times".
|
||||
*
|
||||
* @param hint A hint describing the type of data. Subclasses may use these
|
||||
* to determine how to handle the data, and should ignore any
|
||||
* hints they don't understand. Common hints are:
|
||||
* - "sample_times": `data` contains all sample times for a
|
||||
* measurement (in seconds).
|
||||
*/
|
||||
void process_bulk_data(nvbench::state &state,
|
||||
const std::string &tag,
|
||||
const std::string &hint,
|
||||
const std::vector<nvbench::float64_t> &data)
|
||||
{
|
||||
this->do_process_bulk_data_float64(state, tag, hint, data);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Print details of the unexecuted benchmarks in `benches`. This is used for
|
||||
* `--list`.
|
||||
@@ -142,11 +193,17 @@ struct printer_base
|
||||
|
||||
protected:
|
||||
// Implementation hooks for subclasses:
|
||||
virtual void do_log_argv(const std::vector<std::string>&) {}
|
||||
virtual void do_print_device_info() {}
|
||||
virtual void do_print_log_preamble() {}
|
||||
virtual void do_print_log_epilogue() {}
|
||||
virtual void do_log(nvbench::log_level, const std::string &) {}
|
||||
virtual void do_log_run_state(const nvbench::state &) {}
|
||||
virtual void
|
||||
do_process_bulk_data_float64(nvbench::state &,
|
||||
const std::string &,
|
||||
const std::string &,
|
||||
const std::vector<nvbench::float64_t> &){};
|
||||
virtual void do_print_benchmark_list(const benchmark_vector &) {}
|
||||
virtual void do_print_benchmark_results(const benchmark_vector &) {}
|
||||
|
||||
@@ -159,6 +216,10 @@ protected:
|
||||
|
||||
std::ostream &m_ostream;
|
||||
|
||||
// May be empty, a filename, or "stdout" / "stderr" depending on the type of
|
||||
// stream in m_stream.
|
||||
std::string m_stream_name;
|
||||
|
||||
std::size_t m_completed_state_count{};
|
||||
std::size_t m_total_state_count{};
|
||||
};
|
||||
|
||||
@@ -23,8 +23,9 @@
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
printer_base::printer_base(std::ostream &ostream)
|
||||
printer_base::printer_base(std::ostream &ostream, std::string stream_name)
|
||||
: m_ostream{ostream}
|
||||
, m_stream_name{std::move(stream_name)}
|
||||
{}
|
||||
|
||||
// Defined here to keep <ostream> out of the header
|
||||
|
||||
@@ -46,11 +46,17 @@ struct printer_multiplex : nvbench::printer_base
|
||||
}
|
||||
|
||||
protected:
|
||||
void do_log_argv(const std::vector<std::string> &argv) override;
|
||||
void do_print_device_info() override;
|
||||
void do_print_log_preamble() override;
|
||||
void do_print_log_epilogue() override;
|
||||
void do_log(nvbench::log_level, const std::string &) override;
|
||||
void do_log_run_state(const nvbench::state &) override;
|
||||
void do_process_bulk_data_float64(
|
||||
nvbench::state &,
|
||||
const std::string &,
|
||||
const std::string &,
|
||||
const std::vector<nvbench::float64_t> &) override;
|
||||
void do_print_benchmark_list(const benchmark_vector &benches) override;
|
||||
void do_print_benchmark_results(const benchmark_vector &benches) override;
|
||||
void do_set_completed_state_count(std::size_t states) override;
|
||||
|
||||
@@ -67,6 +67,18 @@ void printer_multiplex::do_log_run_state(const nvbench::state &exec_state)
|
||||
}
|
||||
}
|
||||
|
||||
void printer_multiplex::do_process_bulk_data_float64(
|
||||
state &state,
|
||||
const std::string &tag,
|
||||
const std::string &hint,
|
||||
const std::vector<nvbench::float64_t> &data)
|
||||
{
|
||||
for (auto &format_ptr : m_printers)
|
||||
{
|
||||
format_ptr->process_bulk_data(state, tag, hint, data);
|
||||
}
|
||||
}
|
||||
|
||||
void printer_multiplex::do_print_benchmark_list(const benchmark_vector &benches)
|
||||
{
|
||||
for (auto &format_ptr : m_printers)
|
||||
@@ -109,5 +121,13 @@ void printer_multiplex::do_set_total_state_count(std::size_t states)
|
||||
format_ptr->set_total_state_count(states);
|
||||
}
|
||||
}
|
||||
void printer_multiplex::do_log_argv(const std::vector<std::string> &argv)
|
||||
{
|
||||
printer_base::do_log_argv(argv);
|
||||
for (auto &format_ptr : m_printers)
|
||||
{
|
||||
format_ptr->log_argv(argv);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -119,7 +119,7 @@ struct state
|
||||
std::string column_name = {});
|
||||
|
||||
void add_buffer_size(std::size_t num_bytes,
|
||||
std::string summary_name,
|
||||
std::string summary_tag,
|
||||
std::string column_name = {},
|
||||
std::string description = {});
|
||||
|
||||
@@ -266,10 +266,10 @@ struct state
|
||||
|| is_dram_throughput_collected();
|
||||
}
|
||||
|
||||
summary &add_summary(std::string summary_name);
|
||||
summary &add_summary(std::string summary_tag);
|
||||
summary &add_summary(summary s);
|
||||
[[nodiscard]] const summary &get_summary(std::string_view name) const;
|
||||
[[nodiscard]] summary &get_summary(std::string_view name);
|
||||
[[nodiscard]] const summary &get_summary(std::string_view tag) const;
|
||||
[[nodiscard]] summary &get_summary(std::string_view tag);
|
||||
[[nodiscard]] const std::vector<summary> &get_summaries() const;
|
||||
[[nodiscard]] std::vector<summary> &get_summaries();
|
||||
|
||||
|
||||
@@ -109,9 +109,9 @@ catch (...)
|
||||
return default_value;
|
||||
}
|
||||
|
||||
summary &state::add_summary(std::string summary_name)
|
||||
summary &state::add_summary(std::string summary_tag)
|
||||
{
|
||||
return m_summaries.emplace_back(std::move(summary_name));
|
||||
return m_summaries.emplace_back(std::move(summary_tag));
|
||||
}
|
||||
|
||||
summary &state::add_summary(summary s)
|
||||
@@ -120,29 +120,54 @@ summary &state::add_summary(summary s)
|
||||
return m_summaries.back();
|
||||
}
|
||||
|
||||
const summary &state::get_summary(std::string_view name) const
|
||||
const summary &state::get_summary(std::string_view tag) const
|
||||
{
|
||||
// Check tags first
|
||||
auto iter =
|
||||
std::find_if(m_summaries.cbegin(),
|
||||
m_summaries.cend(),
|
||||
[&name](const auto &s) { return s.get_name() == name; });
|
||||
if (iter == m_summaries.cend())
|
||||
[&tag](const auto &s) { return s.get_tag() == tag; });
|
||||
if (iter != m_summaries.cend())
|
||||
{
|
||||
NVBENCH_THROW(std::invalid_argument, "No summary named '{}'.", name);
|
||||
return *iter;
|
||||
}
|
||||
return *iter;
|
||||
|
||||
// Then names:
|
||||
iter =
|
||||
std::find_if(m_summaries.cbegin(),
|
||||
m_summaries.cend(),
|
||||
[&tag](const auto &s) { return s.get_string("name") == tag; });
|
||||
if (iter != m_summaries.cend())
|
||||
{
|
||||
return *iter;
|
||||
}
|
||||
|
||||
NVBENCH_THROW(std::invalid_argument, "No summary tagged '{}'.", tag);
|
||||
}
|
||||
|
||||
summary &state::get_summary(std::string_view name)
|
||||
summary &state::get_summary(std::string_view tag)
|
||||
{
|
||||
auto iter = std::find_if(m_summaries.begin(),
|
||||
m_summaries.end(),
|
||||
[&name](auto &s) { return s.get_name() == name; });
|
||||
if (iter == m_summaries.end())
|
||||
// Check tags first
|
||||
auto iter =
|
||||
std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) {
|
||||
return s.get_tag() == tag;
|
||||
});
|
||||
if (iter != m_summaries.end())
|
||||
{
|
||||
NVBENCH_THROW(std::invalid_argument, "No summary named '{}'.", name);
|
||||
return *iter;
|
||||
}
|
||||
return *iter;
|
||||
|
||||
// Then names:
|
||||
iter =
|
||||
std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) {
|
||||
return s.get_string("name") == tag;
|
||||
});
|
||||
if (iter != m_summaries.end())
|
||||
{
|
||||
return *iter;
|
||||
}
|
||||
|
||||
NVBENCH_THROW(std::invalid_argument, "No summary tagged '{}'.", tag);
|
||||
}
|
||||
|
||||
const std::vector<summary> &state::get_summaries() const { return m_summaries; }
|
||||
@@ -226,8 +251,9 @@ void state::add_element_count(std::size_t elements, std::string column_name)
|
||||
m_element_count += elements;
|
||||
if (!column_name.empty())
|
||||
{
|
||||
auto &summ = this->add_summary("Element count: " + column_name);
|
||||
summ.set_string("short_name", std::move(column_name));
|
||||
auto &summ = this->add_summary("nv/element_count/" + column_name);
|
||||
summ.set_string("description", "Number of elements: " + column_name);
|
||||
summ.set_string("name", std::move(column_name));
|
||||
summ.set_int64("value", static_cast<nvbench::int64_t>(elements));
|
||||
}
|
||||
}
|
||||
@@ -237,9 +263,8 @@ void state::add_global_memory_reads(std::size_t bytes, std::string column_name)
|
||||
m_global_memory_rw_bytes += bytes;
|
||||
if (!column_name.empty())
|
||||
{
|
||||
this->add_buffer_size(bytes,
|
||||
"Input Buffer Size: " + column_name,
|
||||
std::move(column_name));
|
||||
std::string tag = fmt::format("nv/gmem/reads/{}", column_name);
|
||||
this->add_buffer_size(bytes, std::move(tag), std::move(column_name));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,29 +273,33 @@ void state::add_global_memory_writes(std::size_t bytes, std::string column_name)
|
||||
m_global_memory_rw_bytes += bytes;
|
||||
if (!column_name.empty())
|
||||
{
|
||||
this->add_buffer_size(bytes,
|
||||
"Output Buffer Size: " + column_name,
|
||||
std::move(column_name));
|
||||
const std::string tag = fmt::format("nv/gmem/writes/{}", column_name);
|
||||
this->add_buffer_size(bytes, std::move(tag), std::move(column_name));
|
||||
}
|
||||
}
|
||||
|
||||
void state::add_buffer_size(std::size_t num_bytes,
|
||||
std::string summary_name,
|
||||
std::string summary_tag,
|
||||
std::string column_name,
|
||||
std::string description)
|
||||
{
|
||||
auto &summ = this->add_summary(std::move(summary_name));
|
||||
auto &summ = this->add_summary(std::move(summary_tag));
|
||||
summ.set_string("hint", "bytes");
|
||||
summ.set_int64("value", static_cast<nvbench::int64_t>(num_bytes));
|
||||
|
||||
if (!column_name.empty())
|
||||
{
|
||||
summ.set_string("short_name", std::move(column_name));
|
||||
summ.set_string("name", std::move(column_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
summ.set_string("name", ("None"));
|
||||
summ.set_string("hide", "No column name provided.");
|
||||
}
|
||||
if (!description.empty())
|
||||
{
|
||||
summ.set_string("description", std::move(description));
|
||||
}
|
||||
summ.set_int64("value", static_cast<nvbench::int64_t>(num_bytes));
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -27,50 +27,68 @@ namespace nvbench
|
||||
{
|
||||
|
||||
/**
|
||||
* A named set of key/value pairs associated with a benchmark result.
|
||||
* @brief A single value associated with a benchmark state.
|
||||
*
|
||||
* The summary name is the unabbreviated name for the measurement.
|
||||
* An abbreviated name for column headings can be suggested in a "short_name"
|
||||
* entry (see below).
|
||||
* Each summary object contains a single value with associated metadata, such
|
||||
* as name, description, type, and formatting hints. Each summary object
|
||||
* corresponds to a cell in an output markdown table, with summaries grouped
|
||||
* into columns by their tag.
|
||||
*
|
||||
* Some keys have standard meanings that output formats may use to produce
|
||||
* more readable representations of the result:
|
||||
* The summary tag provided at construction should be a unique identifier that
|
||||
* will be convenient and unambiguous during lookups. For example, summaries
|
||||
* produced by NVBench will begin with `nv/` and contain a hierarchical
|
||||
* organization of descriptors, such as `nv/cold/time/gpu/mean`.
|
||||
*
|
||||
* - "hint": Formatting hints (see below)
|
||||
* - "short_name": Abbreviated name for table headings.
|
||||
* - "description": Longer description of result.
|
||||
* - "value": Actual value.
|
||||
* The summary may contain an arbitrary number of key/value pairs. The keys
|
||||
* are `std::string` and the values may be `std::string`, `int64_t`, or
|
||||
* `float64_t`. These may be used to store arbitrary user data and will be
|
||||
* written into the json output.
|
||||
*
|
||||
* Some keys are reserved and have special meaning. These may be used by tooling
|
||||
* to help interpret data:
|
||||
*
|
||||
* - `"name": required [string]` Compact, used for table headings.
|
||||
* - `"description": optional [string]` Longer description.
|
||||
* - `"value": required [string|float64|int64]` Actual value.
|
||||
* - `"hint": optional [string]` Formatting hints (see below)
|
||||
* - `"hide": optional [string]` If present, the summary will not be included in
|
||||
* markdown output tables.
|
||||
*
|
||||
* Additionally, keys beginning with `nv/` are reserved for NVBench.
|
||||
*
|
||||
* Hints indicate the type of data stored in "value", but may be omitted.
|
||||
* NVBench uses the following hints:
|
||||
*
|
||||
* Hints:
|
||||
* - unset: Arbitrary value is stored in "value".
|
||||
* - "duration": "value" is a float64_t time duration in seconds.
|
||||
* - "item_rate": "value" is a float64_t item rate in elements / second.
|
||||
* - "bytes": "value" is an int64_t number of bytes.
|
||||
* - "byte_rate": "value" is a float64_t byte rate in bytes / second.
|
||||
* - "sample_size": "value" is an int64_t number of samples in a measurement.
|
||||
* - "percentage": "value" is a float64_t percentage (stored as a ratio, 1. =
|
||||
* 100%).
|
||||
* - "sample_size": "value" is an int64_t samples count.
|
||||
* - "percentage": "value" is a float64_t percentage (100% stored as 1.0).
|
||||
* - "file/sample_times":
|
||||
* - "filename" is the path to a binary file that encodes all sample
|
||||
* times (in seconds) as float32_t values.
|
||||
* - "size" is an int64_t containing the number of float32_t values stored in
|
||||
* the binary file.
|
||||
*
|
||||
* The key/value pair functionality is implemented by the
|
||||
* `nvbench::named_values` base class.
|
||||
*
|
||||
* Example: Adding a new summary to an nvbench::state object:
|
||||
*
|
||||
* ```
|
||||
* auto &summ = state.add_summary("Average GPU Time (Batch)");
|
||||
* auto &summ = state.add_summary("nv/batch/gpu/time/mean");
|
||||
* summ.set_string("name", "Batch GPU");
|
||||
* summ.set_string("hint", "duration");
|
||||
* summ.set_string("short_name", "Batch GPU");
|
||||
* summ.set_string("description",
|
||||
* "Average back-to-back kernel execution time as measured "
|
||||
* "by CUDA events.");
|
||||
* "Average batch execution time measured by CUDA event
|
||||
* timers.");
|
||||
* summ.set_float64("value", avg_batch_gpu_time);
|
||||
* ```
|
||||
*/
|
||||
struct summary : public nvbench::named_values
|
||||
{
|
||||
summary() = default;
|
||||
explicit summary(std::string name)
|
||||
: m_name(std::move(name))
|
||||
explicit summary(std::string tag)
|
||||
: m_tag(std::move(tag))
|
||||
{}
|
||||
|
||||
// move-only
|
||||
@@ -79,11 +97,11 @@ struct summary : public nvbench::named_values
|
||||
summary &operator=(const summary &) = delete;
|
||||
summary &operator=(summary &&) = default;
|
||||
|
||||
void set_name(std::string name) { m_name = std::move(name); }
|
||||
[[nodiscard]] const std::string &get_name() const { return m_name; }
|
||||
void set_tag(std::string tag) { m_tag = std::move(tag); }
|
||||
[[nodiscard]] const std::string &get_tag() const { return m_tag; }
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
std::string m_tag;
|
||||
};
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -54,7 +54,8 @@ bool type_axis::get_is_active(std::size_t idx) const
|
||||
|
||||
std::size_t type_axis::get_active_count() const
|
||||
{
|
||||
return std::count(m_mask.cbegin(), m_mask.cend(), true);
|
||||
return static_cast<std::size_t>(
|
||||
std::count(m_mask.cbegin(), m_mask.cend(), true));
|
||||
}
|
||||
|
||||
std::size_t type_axis::get_type_index(const std::string &input_string) const
|
||||
|
||||
@@ -1,2 +1,6 @@
|
||||
tabulate
|
||||
colorama
|
||||
matplotlib
|
||||
numpy
|
||||
pandas
|
||||
seaborn
|
||||
tabulate
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
@@ -10,10 +9,13 @@ from colorama import Fore
|
||||
|
||||
import tabulate
|
||||
|
||||
from nvbench_json import reader
|
||||
|
||||
# Parse version string into tuple, "x.y.z" -> (x, y, z)
|
||||
def version_tuple(v):
|
||||
return tuple(map(int, (v.split("."))))
|
||||
|
||||
|
||||
tabulate_version = version_tuple(tabulate.__version__)
|
||||
|
||||
all_devices = []
|
||||
@@ -38,8 +40,8 @@ def find_device_by_id(device_id):
|
||||
|
||||
|
||||
def format_int64_axis_value(axis_name, axis_value, axes):
|
||||
axis_def = axes[axis_name]
|
||||
axis_flags = axis_def["flags"]
|
||||
axis = next(filter(lambda ax: ax["name"] == axis_name, axes))
|
||||
axis_flags = axis["flags"]
|
||||
value = int(axis_value["value"])
|
||||
if axis_flags == "pow2":
|
||||
value = math.log2(value)
|
||||
@@ -60,8 +62,8 @@ def format_string_axis_value(axis_name, axis_value, axes):
|
||||
|
||||
|
||||
def format_axis_value(axis_name, axis_value, axes):
|
||||
axis_def = axes[axis_name]
|
||||
axis_type = axis_def["type"]
|
||||
axis = next(filter(lambda ax: ax["name"] == axis_name, axes))
|
||||
axis_type = axis["type"]
|
||||
if axis_type == "int64":
|
||||
return format_int64_axis_value(axis_name, axis_value, axes)
|
||||
elif axis_type == "float64":
|
||||
@@ -92,7 +94,7 @@ def format_percentage(percentage):
|
||||
# When there aren't enough samples for a meaningful noise measurement,
|
||||
# the noise is recorded as infinity. Unfortunately, JSON spec doesn't
|
||||
# allow for inf, so these get turned into null.
|
||||
if not percentage:
|
||||
if percentage is None:
|
||||
return "inf"
|
||||
return "%0.2f%%" % (percentage * 100.0)
|
||||
|
||||
@@ -110,7 +112,9 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
ref_states = ref_bench["states"]
|
||||
cmp_states = cmp_bench["states"]
|
||||
|
||||
headers = list(axes.keys()) if axes else []
|
||||
axes = axes if axes else []
|
||||
|
||||
headers = [x["name"] for x in axes]
|
||||
colalign = ["center"] * len(headers)
|
||||
|
||||
headers.append("Ref Time")
|
||||
@@ -131,9 +135,11 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
for device_id in device_ids:
|
||||
|
||||
rows = []
|
||||
for cmp_state_name in cmp_states:
|
||||
cmp_state = cmp_states[cmp_state_name]
|
||||
ref_state = ref_states[cmp_state_name]
|
||||
for cmp_state in cmp_states:
|
||||
cmp_state_name = cmp_state["name"]
|
||||
ref_state = next(filter(lambda st: st["name"] == cmp_state_name,
|
||||
ref_states),
|
||||
None)
|
||||
if not ref_state:
|
||||
continue
|
||||
|
||||
@@ -142,8 +148,8 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
axis_values = []
|
||||
|
||||
row = []
|
||||
for axis_value_name in axis_values:
|
||||
axis_value = axis_values[axis_value_name]
|
||||
for axis_value in axis_values:
|
||||
axis_value_name = axis_value["name"]
|
||||
row.append(format_axis_value(axis_value_name,
|
||||
axis_value,
|
||||
axes))
|
||||
@@ -154,14 +160,13 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
if not ref_summaries or not cmp_summaries:
|
||||
continue
|
||||
|
||||
cmp_time_summary = cmp_summaries.get("Average GPU Time (Cold)")
|
||||
ref_time_summary = ref_summaries.get("Average GPU Time (Cold)")
|
||||
cmp_noise_summary = cmp_summaries.get(
|
||||
"GPU Relative Standard Deviation (Cold)"
|
||||
)
|
||||
ref_noise_summary = ref_summaries.get(
|
||||
"GPU Relative Standard Deviation (Cold)"
|
||||
)
|
||||
def lookup_summary(summaries, tag):
|
||||
return next(filter(lambda s: s["tag"] == tag, summaries), None)
|
||||
|
||||
cmp_time_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/mean")
|
||||
ref_time_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/mean")
|
||||
cmp_noise_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/stdev/relative")
|
||||
ref_noise_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/stdev/relative")
|
||||
|
||||
# TODO: Use other timings, too. Maybe multiple rows, with a
|
||||
# "Timing" column + values "CPU/GPU/Batch"?
|
||||
@@ -171,10 +176,16 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
ref_noise_summary]):
|
||||
continue
|
||||
|
||||
cmp_time = cmp_time_summary["value"]["value"]
|
||||
ref_time = ref_time_summary["value"]["value"]
|
||||
cmp_noise = cmp_noise_summary["value"]["value"]
|
||||
ref_noise = ref_noise_summary["value"]["value"]
|
||||
def extract_value(summary):
|
||||
summary_data = summary["data"]
|
||||
value_data = next(filter(lambda v: v["name"] == "value", summary_data))
|
||||
assert(value_data["type"] == "float64")
|
||||
return value_data["value"]
|
||||
|
||||
cmp_time = extract_value(cmp_time_summary)
|
||||
ref_time = extract_value(ref_time_summary)
|
||||
cmp_noise = extract_value(cmp_noise_summary)
|
||||
ref_noise = extract_value(ref_noise_summary)
|
||||
|
||||
# Convert string encoding to expected numerics:
|
||||
cmp_time = float(cmp_time)
|
||||
@@ -223,7 +234,6 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
|
||||
rows.append(row)
|
||||
|
||||
|
||||
if len(rows) == 0:
|
||||
continue
|
||||
|
||||
@@ -244,13 +254,12 @@ def compare_benches(ref_benches, cmp_benches, threshold):
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]"
|
||||
parser = argparse.ArgumentParser(prog='nvbench_compare', usage=help_text)
|
||||
parser.add_argument('--threshold-diff',type=float, dest='threshold', default=0.0,
|
||||
parser.add_argument('--threshold-diff', type=float, dest='threshold', default=0.0,
|
||||
help='only show benchmarks where percentage diff is >= THRESHOLD')
|
||||
|
||||
args,files_or_dirs = parser.parse_known_args()
|
||||
args, files_or_dirs = parser.parse_known_args()
|
||||
print(files_or_dirs)
|
||||
|
||||
if len(files_or_dirs) != 2:
|
||||
@@ -270,14 +279,12 @@ def main():
|
||||
os.path.getsize(r) > 0 and os.path.getsize(c) > 0:
|
||||
to_compare.append((r, c))
|
||||
else:
|
||||
to_compare = [(files_or_dirs[0],files_or_dirs[1])]
|
||||
to_compare = [(files_or_dirs[0], files_or_dirs[1])]
|
||||
|
||||
for ref,comp in to_compare:
|
||||
for ref, comp in to_compare:
|
||||
|
||||
with open(ref, "r") as ref_file:
|
||||
ref_root = json.load(ref_file)
|
||||
with open(comp, "r") as cmp_file:
|
||||
cmp_root = json.load(cmp_file)
|
||||
ref_root = reader.read_file(ref)
|
||||
cmp_root = reader.read_file(comp)
|
||||
|
||||
global all_devices
|
||||
all_devices = cmp_root["devices"]
|
||||
|
||||
108
scripts/nvbench_histogram.py
Normal file
108
scripts/nvbench_histogram.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
from nvbench_json import reader
|
||||
|
||||
def parse_files():
|
||||
help_text = "%(prog)s [nvbench.out.json | dir/] ..."
|
||||
parser = argparse.ArgumentParser(prog='nvbench_histogram', usage=help_text)
|
||||
|
||||
args, files_or_dirs = parser.parse_known_args()
|
||||
|
||||
filenames = []
|
||||
for file_or_dir in files_or_dirs:
|
||||
if os.path.isdir(file_or_dir):
|
||||
for f in os.listdir(file_or_dir):
|
||||
if os.path.splitext(f)[1] != ".json":
|
||||
continue
|
||||
filename = os.path.join(file_or_dir, f)
|
||||
if os.path.isfile(filename) and os.path.getsize(filename) > 0:
|
||||
filenames.append(filename)
|
||||
else:
|
||||
filenames.append(file_or_dir)
|
||||
|
||||
filenames.sort()
|
||||
|
||||
if not filenames:
|
||||
parser.print_help()
|
||||
exit(0)
|
||||
|
||||
return filenames
|
||||
|
||||
|
||||
def parse_samples_meta(filename, state):
|
||||
summaries = state["summaries"]
|
||||
if not summaries:
|
||||
return None, None
|
||||
|
||||
summary = next(filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times",
|
||||
summaries),
|
||||
None)
|
||||
if not summary:
|
||||
return None, None
|
||||
|
||||
sample_filename = summary["filename"]["value"]
|
||||
|
||||
# If not absolute, the path is relative to the associated .json file:
|
||||
if not os.path.isabs(sample_filename):
|
||||
sample_filename = os.path.join(os.path.dirname(filename), sample_filename)
|
||||
|
||||
sample_count = int(summary["size"]["value"])
|
||||
return sample_count, sample_filename
|
||||
|
||||
|
||||
def parse_samples(filename, state):
|
||||
sample_count, samples_filename = parse_samples_meta(filename, state)
|
||||
if not sample_count or not samples_filename:
|
||||
return []
|
||||
|
||||
with open(samples_filename, "rb") as f:
|
||||
samples = np.fromfile(f, "<f4")
|
||||
|
||||
assert (sample_count == len(samples))
|
||||
return samples
|
||||
|
||||
|
||||
def to_df(data):
|
||||
return pd.DataFrame.from_dict(dict([(k, pd.Series(v)) for k, v in data.items()]))
|
||||
|
||||
|
||||
def parse_json(filename):
|
||||
json_root = reader.read_file(filename)
|
||||
|
||||
samples_data = {}
|
||||
|
||||
for bench in json_root["benchmarks"]:
|
||||
print("Benchmark: {}".format(bench["name"]))
|
||||
for state in bench["states"]:
|
||||
print("State: {}".format(state["name"]))
|
||||
|
||||
samples = parse_samples(filename, state)
|
||||
if len(samples) == 0:
|
||||
continue
|
||||
|
||||
samples_data["{} {}".format(bench["name"], state["name"])] = samples
|
||||
|
||||
return to_df(samples_data)
|
||||
|
||||
|
||||
def main():
|
||||
filenames = parse_files()
|
||||
|
||||
dfs = [parse_json(filename) for filename in filenames]
|
||||
df = pd.concat(dfs, ignore_index=True)
|
||||
|
||||
sns.displot(df, rug=True, kind="kde", fill=True)
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
1
scripts/nvbench_json/.gitignore
vendored
Normal file
1
scripts/nvbench_json/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
__pycache__/*
|
||||
2
scripts/nvbench_json/__init__.py
Normal file
2
scripts/nvbench_json/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from . import reader
|
||||
from . import version
|
||||
10
scripts/nvbench_json/reader.py
Normal file
10
scripts/nvbench_json/reader.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import json
|
||||
|
||||
from . import version
|
||||
|
||||
|
||||
def read_file(filename):
|
||||
with open(filename, "r") as f:
|
||||
file_root = json.load(f)
|
||||
version.check_file_version(filename, file_root)
|
||||
return file_root
|
||||
26
scripts/nvbench_json/version.py
Normal file
26
scripts/nvbench_json/version.py
Normal file
@@ -0,0 +1,26 @@
|
||||
file_version = (1, 0, 0)
|
||||
|
||||
file_version_string = "{}.{}.{}".format(file_version[0],
|
||||
file_version[1],
|
||||
file_version[2])
|
||||
|
||||
|
||||
def check_file_version(filename, root_node):
|
||||
try:
|
||||
version_node = root_node["meta"]["version"]["json"]
|
||||
except KeyError:
|
||||
print("WARNING:")
|
||||
print(" {} is written in an older, unversioned format. ".format(filename))
|
||||
print(" It may not read correctly.")
|
||||
print(" Reader expects JSON file version {}.".format(file_version_string))
|
||||
return
|
||||
|
||||
# TODO We could do something fancy here using semantic versioning, but
|
||||
# for now just warn on mismatch.
|
||||
if version_node["string"] != file_version_string:
|
||||
print("WARNING:")
|
||||
print(" {} was written using a different NVBench JSON file version."
|
||||
.format(filename))
|
||||
print(" It may not read correctly.")
|
||||
print(" (file version: {} reader version: {})"
|
||||
.format(version_node["string"], file_version_string))
|
||||
357
scripts/nvbench_walltime.py
Normal file
357
scripts/nvbench_walltime.py
Normal file
@@ -0,0 +1,357 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
|
||||
from nvbench_json import reader
|
||||
|
||||
import tabulate
|
||||
|
||||
|
||||
# Parse version string into tuple, "x.y.z" -> (x, y, z)
|
||||
def version_tuple(v):
|
||||
return tuple(map(int, (v.split("."))))
|
||||
|
||||
|
||||
tabulate_version = version_tuple(tabulate.__version__)
|
||||
|
||||
all_devices = []
|
||||
|
||||
|
||||
def format_axis_value(axis_value, axis_type):
|
||||
if axis_type == "int64":
|
||||
return "%d" % int(axis_value)
|
||||
elif axis_type == "float64":
|
||||
return "%.5g" % float(axis_value)
|
||||
else:
|
||||
return axis_value
|
||||
|
||||
|
||||
def format_walltime(seconds_in):
|
||||
h = math.floor(seconds_in / (60 * 60))
|
||||
m = math.floor((seconds_in / 60) % 60)
|
||||
s = math.floor(seconds_in % 60)
|
||||
ms = math.floor((seconds_in * 1000) % 1000)
|
||||
|
||||
return "{}{}{}{}".format(
|
||||
"{:0>2d}:".format(h) if h > 1e-9 else "",
|
||||
"{:0>2d}:".format(m) if (h > 1e-9 or m > 1e-9) else "",
|
||||
"{:0>2d}.".format(s) if (h > 1e-9 or m > 1e-9) else "{:d}.".format(s),
|
||||
"{:0>3d}".format(ms))
|
||||
|
||||
|
||||
def format_percentage(percentage):
|
||||
# When there aren't enough samples for a meaningful noise measurement,
|
||||
# the noise is recorded as infinity. Unfortunately, JSON spec doesn't
|
||||
# allow for inf, so these get turned into null.
|
||||
if percentage is None:
|
||||
return "inf"
|
||||
return "%0.2f%%" % (percentage * 100.0)
|
||||
|
||||
|
||||
measure_names = ["cold", "batch", "cupti"]
|
||||
measure_column_names = {"cold": "Isolated", "batch": "Batch", "cupti": "CUPTI"}
|
||||
|
||||
|
||||
def init_measures():
|
||||
out = {}
|
||||
for name in measure_names:
|
||||
out[name] = 0.
|
||||
return out
|
||||
|
||||
|
||||
def get_measures(state):
|
||||
summaries = state["summaries"]
|
||||
times = {}
|
||||
for name in measure_names:
|
||||
measure_walltime_tag = "nv/{}/walltime".format(name)
|
||||
summary = next(filter(lambda s: s["tag"] == measure_walltime_tag,
|
||||
summaries),
|
||||
None)
|
||||
if not summary:
|
||||
continue
|
||||
|
||||
walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"]))
|
||||
assert(walltime_data["type"] == "float64")
|
||||
walltime = walltime_data["value"]
|
||||
walltime = float(walltime)
|
||||
times[name] = walltime if walltime else 0.
|
||||
return times
|
||||
|
||||
|
||||
def merge_measures(target, src):
|
||||
for name, src_val in src.items():
|
||||
target[name] += src_val
|
||||
|
||||
|
||||
def sum_measures(measures):
|
||||
total_time = 0.
|
||||
for time in measures.values():
|
||||
total_time += time
|
||||
return total_time
|
||||
|
||||
|
||||
def get_active_measure_names(measures):
|
||||
names = []
|
||||
for name, time in measures.items():
|
||||
if time > 1e-9:
|
||||
names.append(name)
|
||||
return names
|
||||
|
||||
|
||||
def append_measure_headers(headers, active=measure_names):
|
||||
for name in active:
|
||||
headers.append(measure_column_names[name])
|
||||
|
||||
|
||||
def append_measure_values(row, measures, active=measure_names):
|
||||
for name in active:
|
||||
row.append(format_walltime(measures[name]))
|
||||
|
||||
|
||||
def consume_file(filename):
|
||||
file_root = reader.read_file(filename)
|
||||
|
||||
file_out = {}
|
||||
file_measures = init_measures()
|
||||
|
||||
benches = {}
|
||||
for bench in file_root["benchmarks"]:
|
||||
bench_data = consume_benchmark(bench, file_root)
|
||||
merge_measures(file_measures, bench_data["measures"])
|
||||
benches[bench["name"]] = bench_data
|
||||
|
||||
file_out["benches"] = benches
|
||||
file_out["measures"] = file_measures
|
||||
return file_out
|
||||
|
||||
|
||||
def consume_benchmark(bench, file_root):
|
||||
bench_out = {}
|
||||
|
||||
# Initialize axis map
|
||||
axes_out = {}
|
||||
axes = bench["axes"]
|
||||
if axes:
|
||||
for axis in axes:
|
||||
values_out = {}
|
||||
axis_name = axis["name"]
|
||||
axis_type = axis["type"]
|
||||
for value in axis["values"]:
|
||||
if axis_type == "type":
|
||||
value = value["input_string"]
|
||||
else:
|
||||
value = format_axis_value(value["value"], axis_type)
|
||||
values_out[value] = {"measures": init_measures()}
|
||||
axes_out[axis_name] = values_out
|
||||
|
||||
states_out = {}
|
||||
bench_measures = init_measures()
|
||||
|
||||
for state in bench["states"]:
|
||||
state_name = state["name"]
|
||||
# Get walltimes for each measurement:
|
||||
state_measures = get_measures(state)
|
||||
state_out = {}
|
||||
state_out["measures"] = state_measures
|
||||
states_out[state_name] = state_out
|
||||
|
||||
# Update the benchmark measures walltimes
|
||||
merge_measures(bench_measures, state_measures)
|
||||
|
||||
# Update the axis measurements:
|
||||
axis_values = state["axis_values"]
|
||||
if axis_values:
|
||||
for axis_value in axis_values:
|
||||
axis_name = axis_value["name"]
|
||||
value = format_axis_value(axis_value["value"], axis_value["type"])
|
||||
merge_measures(axes_out[axis_name][value]["measures"], state_measures)
|
||||
|
||||
bench_out["axes"] = axes_out
|
||||
bench_out["measures"] = bench_measures
|
||||
bench_out["states"] = states_out
|
||||
return bench_out
|
||||
|
||||
|
||||
def print_overview_section(data):
|
||||
print("# Walltime Overview\n")
|
||||
|
||||
measures = data["measures"]
|
||||
active_measures = get_active_measure_names(measures)
|
||||
|
||||
headers = ["Walltime"]
|
||||
append_measure_headers(headers, active_measures)
|
||||
|
||||
colalign = ["right"] * len(headers)
|
||||
|
||||
rows = []
|
||||
|
||||
row = [format_walltime(sum_measures(measures))]
|
||||
append_measure_values(row, measures, active_measures)
|
||||
rows.append(row)
|
||||
|
||||
# colalign and github format require tabulate 0.8.3
|
||||
if tabulate_version >= (0, 8, 3):
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
colalign=colalign,
|
||||
tablefmt="github"))
|
||||
else:
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
tablefmt="markdown"))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
# append_data_row_lambda args: (row_list, name, items[name])
|
||||
def print_measures_table(headers, colalign, items, total_measures, append_item_row_lambda):
|
||||
total_time = sum_measures(total_measures)
|
||||
active_measures = get_active_measure_names(total_measures)
|
||||
num_user_columns = len(headers)
|
||||
|
||||
headers.append("%")
|
||||
headers.append("Walltime")
|
||||
append_measure_headers(headers, active_measures)
|
||||
|
||||
while len(colalign) < len(headers):
|
||||
colalign.append("right")
|
||||
|
||||
rows = []
|
||||
|
||||
for name, item in items.items():
|
||||
item_measures = item["measures"]
|
||||
item_time = sum_measures(item_measures)
|
||||
|
||||
row = []
|
||||
append_item_row_lambda(row, name, item)
|
||||
if total_time > 1e-9:
|
||||
row.append(format_percentage(item_time / total_time))
|
||||
else:
|
||||
row.append(format_percentage(0))
|
||||
row.append(format_walltime(item_time))
|
||||
append_measure_values(row, item_measures, active_measures)
|
||||
rows.append(row)
|
||||
|
||||
# Totals:
|
||||
row = []
|
||||
if num_user_columns != 0:
|
||||
row.append("Total")
|
||||
while len(row) < num_user_columns:
|
||||
row.append("")
|
||||
row.append(format_percentage(1))
|
||||
row.append(format_walltime(total_time))
|
||||
append_measure_values(row, total_measures, active_measures)
|
||||
rows.append(row)
|
||||
|
||||
# colalign and github format require tabulate 0.8.3
|
||||
if tabulate_version >= (0, 8, 3):
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
colalign=colalign,
|
||||
tablefmt="github"))
|
||||
else:
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
tablefmt="markdown"))
|
||||
|
||||
|
||||
def print_files_section(data):
|
||||
print("# Files\n")
|
||||
|
||||
items = data["files"]
|
||||
total_measures = data["measures"]
|
||||
headers = ["Filename"]
|
||||
colalign = ["left"]
|
||||
|
||||
def append_row(row, name, item):
|
||||
row.append(name)
|
||||
|
||||
print_measures_table(headers, colalign, items, total_measures, append_row)
|
||||
print()
|
||||
|
||||
for filename, file in items.items():
|
||||
print_file_section(filename, file)
|
||||
|
||||
|
||||
def print_file_section(filename, file):
|
||||
print("## File: {}\n".format(filename))
|
||||
|
||||
items = file["benches"]
|
||||
total_measures = file["measures"]
|
||||
headers = ["Benchmark"]
|
||||
colalign = ["left"]
|
||||
|
||||
def append_row_name(row, name, item):
|
||||
row.append(name)
|
||||
|
||||
print_measures_table(headers, colalign, items, total_measures, append_row_name)
|
||||
print()
|
||||
|
||||
for bench_name, bench in items.items():
|
||||
print_bench_section(bench_name, bench)
|
||||
|
||||
|
||||
def print_bench_section(bench_name, bench):
|
||||
print("### Benchmark: {}\n".format(bench_name))
|
||||
|
||||
# TODO split this up so each axis is a column
|
||||
items = bench["states"]
|
||||
total_measures = bench["measures"]
|
||||
headers = ["Configuration"]
|
||||
colalign = ["left"]
|
||||
|
||||
def append_row_name(row, name, item):
|
||||
row.append(name)
|
||||
|
||||
print_measures_table(headers, colalign, items, total_measures, append_row_name)
|
||||
print()
|
||||
|
||||
for axis_name, axis in bench["axes"].items():
|
||||
total_measures = bench["measures"]
|
||||
headers = ["Axis: " + axis_name]
|
||||
colalign = ["left"]
|
||||
print_measures_table(headers, colalign, axis, total_measures, append_row_name)
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
help_text = "%(prog)s [nvbench.out.json | dir/]..."
|
||||
parser = argparse.ArgumentParser(prog='nvbench_walltime', usage=help_text)
|
||||
|
||||
args, files_or_dirs = parser.parse_known_args()
|
||||
|
||||
filenames = []
|
||||
for file_or_dir in files_or_dirs:
|
||||
if os.path.isdir(file_or_dir):
|
||||
for f in os.listdir(file_or_dir):
|
||||
if os.path.splitext(f)[1] != ".json":
|
||||
continue
|
||||
filename = os.path.join(file_or_dir, f)
|
||||
if os.path.isfile(filename) and os.path.getsize(filename) > 0:
|
||||
filenames.append(filename)
|
||||
else:
|
||||
filenames.append(file_or_dir)
|
||||
|
||||
filenames.sort()
|
||||
|
||||
data = {}
|
||||
|
||||
files_out = {}
|
||||
measures = init_measures()
|
||||
for filename in filenames:
|
||||
file_data = consume_file(filename)
|
||||
merge_measures(measures, file_data["measures"])
|
||||
files_out[filename] = file_data
|
||||
|
||||
data["files"] = files_out
|
||||
data["measures"] = measures
|
||||
|
||||
print_overview_section(data)
|
||||
print_files_section(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
32381
scripts/test_cmp.json
32381
scripts/test_cmp.json
File diff suppressed because it is too large
Load Diff
32381
scripts/test_ref.json
32381
scripts/test_ref.json
File diff suppressed because it is too large
Load Diff
@@ -48,12 +48,12 @@ void noisy_bench(nvbench::state &state)
|
||||
});
|
||||
|
||||
const auto measured_mean = static_cast<nvbench::float32_t>(
|
||||
state.get_summary("Average GPU Time (Cold)").get_float64("value"));
|
||||
state.get_summary("nv/cold/time/gpu/mean").get_float64("value"));
|
||||
const auto measured_noise = [&]() {
|
||||
try
|
||||
{
|
||||
return static_cast<nvbench::float32_t>(
|
||||
state.get_summary("GPU Relative Standard Deviation (Cold)")
|
||||
state.get_summary("nv/cold/time/gpu/stdev/relative")
|
||||
.get_float64("value"));
|
||||
}
|
||||
catch (std::invalid_argument &)
|
||||
|
||||
Reference in New Issue
Block a user