Finish off proof of concept level functionality.

Add `measure_hot` and `markdown_format` as detail classes, since
they're still pretty rough and need to be worked into the design.
But for now, they're functional proofs of concept. The formatting is
rough, but demonstrates basics.

Also added launch, exec, and summary functionality. These aren't details
and will be part of the project's API.
This commit is contained in:
Allison Vacanti
2021-01-01 21:47:20 -05:00
parent 099286867d
commit 93ed09f2b3
16 changed files with 572 additions and 13 deletions

View File

@@ -11,6 +11,8 @@ set(srcs
string_axis.cu
type_axis.cu
detail/markdown_format.cu
detail/measure_hot.cu
detail/state_generator.cu
)

View File

@@ -1,5 +1,7 @@
#pragma once
#include <nvbench/types.cuh>
#include <chrono>
namespace nvbench
@@ -19,7 +21,8 @@ struct cpu_timer
void stop() { m_stop = std::chrono::high_resolution_clock::now(); }
double get_duration()
// In seconds:
[[nodiscard]] nvbench::float64_t get_duration()
{
const auto duration = m_stop - m_start;
const auto ns =

View File

@@ -1,3 +1,5 @@
#pragma once
#include <nvbench/cuda_call.cuh>
#include <cuda_runtime_api.h>
@@ -17,7 +19,7 @@ struct cuda_stream
cuda_stream &operator=(const cuda_stream &) = delete;
cuda_stream &operator=(cuda_stream &&) = default;
operator cudaStream_t() { return m_stream; }
operator cudaStream_t() const { return m_stream; }
private:
cudaStream_t m_stream;

View File

@@ -2,6 +2,8 @@
#include <nvbench/cuda_call.cuh>
#include <nvbench/types.cuh>
#include <cuda_runtime_api.h>
namespace nvbench
@@ -37,7 +39,7 @@ struct cuda_timer
NVBENCH_CUDA_CALL(cudaEventRecord(m_stop, stream));
}
bool ready() const
[[nodiscard]] bool ready() const
{
const cudaError_t state = cudaEventQuery(m_stop);
if (state == cudaErrorNotReady)
@@ -48,7 +50,8 @@ struct cuda_timer
return true;
}
double get_duration() const
// In seconds:
[[nodiscard]] nvbench::float64_t get_duration() const
{
NVBENCH_CUDA_CALL(cudaEventSynchronize(m_stop));
float elapsed_time;

View File

@@ -0,0 +1,175 @@
#include <nvbench/detail/markdown_format.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <functional>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>
namespace
{
struct table_builder
{
void add_cell(std::size_t row, const std::string &header, std::string value)
{
auto iter = std::find_if(m_columns.begin(),
m_columns.end(),
[&header](const column &col) {
return col.header == header;
});
auto &col = iter == m_columns.end()
? m_columns.emplace_back(
column{header, std::vector<std::string>{}, header.size()})
: *iter;
col.max_width = std::max(col.max_width, value.size());
if (col.rows.size() <= row)
{
col.rows.resize(row + 1);
col.rows[row] = std::move(value);
}
}
std::string to_string()
{
fmt::memory_buffer buffer;
this->fix_row_lengths();
this->print_header(buffer);
this->print_divider(buffer);
this->print_rows(buffer);
return fmt::to_string(buffer);
}
private:
struct column
{
std::string header;
std::vector<std::string> rows;
std::size_t max_width;
};
void fix_row_lengths()
{ // Ensure that each row is the same length:
m_num_rows = std::transform_reduce(
m_columns.cbegin(),
m_columns.cend(),
0ll,
[](const auto &a, const auto &b) { return a > b ? a : b; },
[](const column &col) { return col.rows.size(); });
std::for_each(m_columns.begin(),
m_columns.end(),
[num_rows = m_num_rows](column &col) {
col.rows.resize(num_rows);
});
}
void print_header(fmt::memory_buffer &buffer)
{
fmt::format_to(buffer, "|");
for (const column &col : m_columns)
{
fmt::format_to(buffer, " {:^{}} |", col.header, col.max_width);
}
fmt::format_to(buffer, "\n");
}
void print_divider(fmt::memory_buffer &buffer)
{
fmt::format_to(buffer, "|");
for (const column &col : m_columns)
{ // fill=-, centered, empty string, width = max_width + 2
fmt::format_to(buffer, "{:-^{}}|", "", col.max_width + 2);
}
fmt::format_to(buffer, "\n");
}
void print_rows(fmt::memory_buffer &buffer)
{
for (std::size_t row = 0; row < m_num_rows; ++row)
{
fmt::format_to(buffer, "|");
for (const column &col : m_columns)
{ // fill=-, centered, empty string, width = max_width + 2
fmt::format_to(buffer, " {:{}} |", col.rows[row], col.max_width);
}
fmt::format_to(buffer, "\n");
}
}
std::string m_row_format;
std::string m_div;
std::vector<column> m_columns;
std::size_t m_num_rows;
};
} // namespace
namespace nvbench
{
namespace detail
{
void markdown_format::print()
{
auto format_visitor = [](const auto &v) {
using T = std::decay_t<decltype(v)>;
if constexpr (std::is_same_v<T, nvbench::float64_t>)
{
return fmt::format("{:7.5g}", v);
}
else if constexpr (std::is_same_v<T, std::string>)
{
return v;
}
return fmt::format("{}", v);
};
auto &mgr = nvbench::benchmark_manager::get();
for (const auto &bench_ptr : mgr.get_benchmarks())
{
fmt::print("\n# {}\n\n", bench_ptr->get_name());
std::size_t row = 0;
table_builder table;
for (const auto &inner_states : bench_ptr->get_states())
{
for (const nvbench::state &state : inner_states)
{
const auto &axis_values = state.get_axis_values();
for (const auto &name : axis_values.get_names())
{
std::string value = std::visit(format_visitor,
axis_values.get_value(name));
table.add_cell(row, name, std::move(value));
}
for (const auto &summary : state.get_summaries())
{
const std::string &name = summary.has_value("short_name")
? summary.get_string("short_name")
: summary.get_name();
std::string value = std::visit(format_visitor,
summary.get_value("value"));
table.add_cell(row, name, std::move(value));
}
row++;
}
}
fmt::print(table.to_string());
}
}
} // namespace detail
} // namespace nvbench

View File

@@ -0,0 +1,17 @@
#pragma once
namespace nvbench
{
namespace detail
{
struct markdown_format
{
// Hacked in to just print a basic summary table to stdout. There's lots of
// room for improvement here.
void print();
};
} // namespace detail
} // namespace nvbench

View File

@@ -0,0 +1,45 @@
#include <nvbench/detail/measure_hot.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
namespace nvbench
{
namespace detail
{
void measure_hot_base::generate_summaries()
{
{
auto &summary = m_state.add_summary("Number of Trials (Hot)");
summary.set_string("short_name", "Hot Trials");
summary.set_string("description",
"Number of kernel executions in hot time measurements.");
summary.set_int64("value", m_num_trials);
}
{
auto &summary = m_state.add_summary("Average GPU Time (Hot)");
summary.set_string("hint", "duration");
summary.set_string("short_name", "Hot GPU");
summary.set_string("description",
"Average back-to-back kernel execution time as measured "
"by CUDA events.");
summary.set_float64("value", m_cuda_time / m_num_trials);
}
{
auto &summary = m_state.add_summary("Average CPU Time (Hot)");
summary.set_string("hint", "duration");
summary.set_string("short_name", "Hot CPU");
summary.set_string("description",
"Average back-to-back kernel execution time observed "
"from host.");
summary.set_float64("value", m_cpu_time / m_num_trials);
}
}
} // namespace detail
} // namespace nvbench

View File

@@ -0,0 +1,113 @@
#pragma once
#include <nvbench/cpu_timer.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/state.cuh>
#include <cuda_runtime.h>
#include <utility>
namespace nvbench
{
namespace detail
{
// non-templated code goes here:
struct measure_hot_base
{
explicit measure_hot_base(nvbench::state &exec_state)
: m_state(exec_state)
{}
measure_hot_base(const measure_hot_base &) = delete;
measure_hot_base(measure_hot_base &&) = delete;
measure_hot_base &operator=(const measure_hot_base &) = delete;
measure_hot_base &operator=(measure_hot_base &&) = delete;
protected:
void initialize()
{
m_cuda_time = 0.;
m_cpu_time = 0.;
m_num_trials = 0;
}
void generate_summaries();
nvbench::launch m_launch{};
nvbench::cuda_timer m_cuda_timer{};
nvbench::cpu_timer m_cpu_timer{};
// seconds:
nvbench::float64_t m_min_time{1.};
nvbench::float64_t m_cuda_time{};
nvbench::float64_t m_cpu_time{};
nvbench::int64_t m_num_trials{};
nvbench::state &m_state;
};
template <typename KernelLauncher>
struct measure_hot : public measure_hot_base
{
measure_hot(nvbench::state &state, KernelLauncher &kernel_launcher)
: measure_hot_base(state)
, m_kernel_launcher{kernel_launcher}
{}
void operator()()
{
this->initialize();
this->run_warmup();
this->run_trials();
this->generate_summaries();
}
private:
void run_warmup()
{
m_cuda_timer.start(m_launch.get_stream());
this->launch_kernel();
m_cuda_timer.stop(m_launch.get_stream());
}
void run_trials()
{
// Use warmup results to estimate the number of iterations to run:
const auto warmup = m_cuda_timer.get_duration();
auto cur_trials = static_cast<nvbench::int64_t>(m_min_time / warmup);
cur_trials = std::max(cur_trials, 1ll);
do
{
m_cuda_timer.start(m_launch.get_stream());
m_cpu_timer.start();
for (nvbench::int64_t i = 0; i < cur_trials; ++i)
{
this->launch_kernel();
}
m_cuda_timer.stop(m_launch.get_stream());
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
m_cpu_timer.stop();
m_cuda_time += m_cuda_timer.get_duration();
m_cpu_time += m_cpu_timer.get_duration();
m_num_trials += cur_trials;
// Predict number of remaining iterations:
cur_trials = (m_min_time - m_cuda_time) / (m_cuda_time / m_num_trials);
} while (cur_trials > 0);
}
// TODO forceinline
void launch_kernel() { m_kernel_launcher(m_launch); }
KernelLauncher &m_kernel_launcher;
};
} // namespace detail
} // namespace nvbench

16
nvbench/exec.cuh Normal file
View File

@@ -0,0 +1,16 @@
#pragma once
#include <nvbench/detail/measure_hot.cuh>
namespace nvbench
{
template <typename KernelLauncher>
void exec(nvbench::state &exec_state, KernelLauncher &&kernel_launcher)
{
using KL = std::remove_reference_t<KernelLauncher>;
nvbench::detail::measure_hot<KL> hot{exec_state, kernel_launcher};
hot();
}
} // namespace nvbench

23
nvbench/launch.cuh Normal file
View File

@@ -0,0 +1,23 @@
#pragma once
#include <nvbench/cuda_stream.cuh>
namespace nvbench
{
struct launch
{
// move-only
launch() = default;
launch(const launch &) = delete;
launch(launch &&) = default;
launch &operator=(const launch &) = delete;
launch &operator=(launch &&) = default;
const nvbench::cuda_stream &get_stream() const { return m_stream; };
private:
nvbench::cuda_stream m_stream;
};
} // namespace nvbench

View File

@@ -2,9 +2,14 @@
#include <nvbench/benchmark_base.cuh>
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/detail/markdown_format.cuh>
#define NVBENCH_MAIN \
int main() { BENCHMARK_MAIN_BODY; }
int main() \
{ \
NVBENCH_MAIN_BODY; \
return 0; \
}
#define NVBENCH_MAIN_BODY \
do \
@@ -14,4 +19,6 @@
{ \
bench_ptr->run(); \
} \
nvbench::detail::markdown_format printer; \
printer.print(); \
} while (false)

View File

@@ -9,6 +9,8 @@
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/exec.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/main.cuh>
#include <nvbench/state.cuh>
#include <nvbench/type_list.cuh>

View File

@@ -2,6 +2,10 @@
#include <nvbench/types.cuh>
#include <fmt/format.h>
#include <algorithm>
#include <stdexcept>
#include <string>
namespace nvbench
@@ -22,4 +26,46 @@ const std::string &state::get_string(const std::string &axis_name) const
return m_axis_values.get_string(axis_name);
}
summary &state::add_summary(std::string summary_name)
{
return m_summaries.emplace_back(std::move(summary_name));
}
summary &state::add_summary(summary s)
{
m_summaries.push_back(std::move(s));
return m_summaries.back();
}
const summary &state::get_summary(std::string_view name) const
{
auto iter =
std::find_if(m_summaries.cbegin(),
m_summaries.cend(),
[&name](const auto &s) { return s.get_name() == name; });
if (iter == m_summaries.cend())
{
throw std::runtime_error(
fmt::format("{}:{}: No summary named '{}'.", __FILE__, __LINE__, name));
}
return *iter;
}
summary &state::get_summary(std::string_view name)
{
auto iter = std::find_if(m_summaries.begin(),
m_summaries.end(),
[&name](auto &s) { return s.get_name() == name; });
if (iter == m_summaries.end())
{
throw std::runtime_error(
fmt::format("{}:{}: No summary named '{}'.", __FILE__, __LINE__, name));
}
return *iter;
}
const std::vector<summary> &state::get_summaries() const { return m_summaries; }
std::vector<summary> &state::get_summaries() { return m_summaries; }
} // namespace nvbench

View File

@@ -1,9 +1,11 @@
#pragma once
#include <nvbench/named_values.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/types.cuh>
#include <string>
#include <vector>
namespace nvbench
{
@@ -13,6 +15,20 @@ namespace detail
struct state_generator;
}
/**
* Stores all information about a particular benchmark configuration.
*
* One state object exists for every combination of a benchmark's parameter
* axes. It provides access to:
* - Parameter values (get_int64, get_float64, get_string)
* - The names of parameters from type axes are stored as strings.
* - Skip information (skip, is_skipped, get_skip_reason)
* - If the benchmark fails or is invalid, it may be skipped with an
* informative message.
* - Summaries (add_summary, get_summary, get_summaries)
* - Summaries store measurement information as key/value pairs.
* See nvbench::summary for details.
*/
struct state
{
// move-only
@@ -41,6 +57,13 @@ struct state
return m_axis_values;
}
summary &add_summary(std::string summary_name);
summary &add_summary(summary s);
[[nodiscard]] const summary &get_summary(std::string_view name) const;
[[nodiscard]] summary &get_summary(std::string_view name);
[[nodiscard]] const std::vector<summary> &get_summaries() const;
[[nodiscard]] std::vector<summary> &get_summaries();
protected:
friend struct nvbench::detail::state_generator;
@@ -51,6 +74,7 @@ protected:
{}
nvbench::named_values m_axis_values;
std::vector<nvbench::summary> m_summaries;
std::string m_skip_reason;
};

45
nvbench/summary.cuh Normal file
View File

@@ -0,0 +1,45 @@
#pragma once
#include <nvbench/named_values.cuh>
#include <string>
#include <utility>
namespace nvbench
{
/**
* A named set of key/value pairs associated with a measurement.
*
* The key/value pair functionality is implemented by the `named_values` base
* class.
*
* Some keys have standard meanings that output formats may use to produce
* better representations of the summary.
* @todo TODO fill this out as the format writers develop. These are some ideas:
* - "hint": {"duration", "bandwidth", "bytes", "etc}
* - "fmt_string": "{:9.5f}"
* - "short_name": "%PeakMBW" (Abbreviated name for table headings)
* - "description": "Average global device memory throughput as a percentage of the device's peak bandwidth."
*
* Hints:
* - "hint" unset: Arbitrary value is stored in a key named "value".
* - "hint" == "duration":
* - "value" is a float64_t with the mean elapsed time in seconds.
* - Additional optional float64_t keys: "min", "max", "stdev"
*/
struct summary : public nvbench::named_values
{
summary() = default;
explicit summary(std::string name)
: m_name(std::move(name))
{}
void set_name(std::string name) { m_name = std::move(name); }
const std::string &get_name() const { return m_name; }
private:
std::string m_name;
};
} // namespace nvbench

View File

@@ -1,5 +1,6 @@
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
@@ -23,14 +24,49 @@ struct state_tester : public nvbench::state
void test_params()
{
// Build a state param by param
state_tester state1;
state1.set_param("TestInt", nvbench::int64_t{22});
state1.set_param("TestFloat", nvbench::float64_t{3.14});
state1.set_param("TestString", "A String!");
state_tester state;
state.set_param("TestInt", nvbench::int64_t{22});
state.set_param("TestFloat", nvbench::float64_t{3.14});
state.set_param("TestString", "A String!");
ASSERT(state1.get_int64("TestInt") == nvbench::int64_t{22});
ASSERT(state1.get_float64("TestFloat") == nvbench::float64_t{3.14});
ASSERT(state1.get_string("TestString") == "A String!");
ASSERT(state.get_int64("TestInt") == nvbench::int64_t{22});
ASSERT(state.get_float64("TestFloat") == nvbench::float64_t{3.14});
ASSERT(state.get_string("TestString") == "A String!");
}
int main() { test_params(); }
void test_summaries()
{
state_tester state;
ASSERT(state.get_summaries().size() == 0);
{
nvbench::summary& summary = state.add_summary("Test Summary1");
summary.set_float64("Float", 3.14);
summary.set_int64("Int", 128);
summary.set_string("String", "str");
}
ASSERT(state.get_summaries().size() == 1);
ASSERT(state.get_summary("Test Summary1").get_size() == 3);
ASSERT(state.get_summary("Test Summary1").get_float64("Float") == 3.14);
ASSERT(state.get_summary("Test Summary1").get_int64("Int") == 128);
ASSERT(state.get_summary("Test Summary1").get_string("String") == "str");
{
nvbench::summary summary{"Test Summary2"};
state.add_summary(std::move(summary));
}
ASSERT(state.get_summaries().size() == 2);
ASSERT(state.get_summary("Test Summary1").get_size() == 3);
ASSERT(state.get_summary("Test Summary1").get_float64("Float") == 3.14);
ASSERT(state.get_summary("Test Summary1").get_int64("Int") == 128);
ASSERT(state.get_summary("Test Summary1").get_string("String") == "str");
ASSERT(state.get_summary("Test Summary2").get_size() == 0);
}
int main()
{
test_params();
test_summaries();
}