From 8f1152d4a22287a35be2dde596e3cf86ace8054a Mon Sep 17 00:00:00 2001 From: clang-format <> Date: Thu, 18 Aug 2022 22:09:24 +0400 Subject: [PATCH] Auto format --- nvbench/axes_metadata.cuh | 28 ++- nvbench/axes_metadata.cxx | 33 ++-- nvbench/axis_base.cuh | 5 +- nvbench/axis_base.cxx | 5 +- nvbench/benchmark.cuh | 8 +- nvbench/benchmark_base.cuh | 48 ++---- nvbench/benchmark_base.cxx | 3 +- nvbench/benchmark_manager.cuh | 19 +-- nvbench/benchmark_manager.cxx | 19 +-- nvbench/blocking_kernel.cu | 90 +++++----- nvbench/blocking_kernel.cuh | 6 +- nvbench/callable.cuh | 42 ++--- nvbench/cpu_timer.cuh | 19 +-- nvbench/create.cuh | 24 ++- nvbench/csv_printer.cu | 29 +--- nvbench/cuda_call.cu | 8 +- nvbench/cuda_call.cuh | 59 +++---- nvbench/cuda_stream.cuh | 4 +- nvbench/cuda_timer.cuh | 6 +- nvbench/cupti_profiler.cuh | 11 +- nvbench/cupti_profiler.cxx | 159 +++++++----------- nvbench/detail/device_scope.cuh | 6 +- .../detail/kernel_launcher_timer_wrapper.cuh | 2 +- nvbench/detail/l2flush.cuh | 7 +- nvbench/detail/measure_cold.cu | 66 +++----- nvbench/detail/measure_cold.cuh | 11 +- nvbench/detail/measure_cupti.cu | 50 ++---- nvbench/detail/measure_cupti.cuh | 11 +- nvbench/detail/measure_hot.cu | 16 +- nvbench/detail/measure_hot.cuh | 11 +- nvbench/detail/ring_buffer.cuh | 10 +- nvbench/detail/state_exec.cuh | 28 ++- nvbench/detail/state_generator.cxx | 85 ++++------ nvbench/detail/statistics.cuh | 3 +- nvbench/detail/throw.cuh | 22 ++- nvbench/detail/transform_reduce.cuh | 5 +- nvbench/detail/type_list_impl.cuh | 32 ++-- nvbench/device_info.cu | 36 ++-- nvbench/device_info.cuh | 81 ++------- nvbench/device_manager.cuh | 25 +-- nvbench/enum_type_list.cuh | 25 ++- nvbench/exec_tag.cuh | 17 +- nvbench/flags.cuh | 40 ++--- nvbench/float64_axis.cuh | 15 +- nvbench/git_revision.cuh | 36 ++-- nvbench/int64_axis.cuh | 23 +-- nvbench/int64_axis.cxx | 5 +- nvbench/internal/markdown_table.cuh | 12 +- nvbench/internal/nvml.cuh | 41 ++--- nvbench/internal/table_builder.cuh | 24 +-- nvbench/json_printer.cu | 66 +++----- nvbench/json_printer.cuh | 23 +-- nvbench/launch.cuh | 11 +- nvbench/main.cuh | 92 +++++----- nvbench/markdown_printer.cu | 84 +++------ nvbench/named_values.cuh | 9 +- nvbench/named_values.cxx | 47 ++---- nvbench/option_parser.cu | 133 ++++----------- nvbench/option_parser.cuh | 19 +-- nvbench/printer_base.cuh | 37 ++-- nvbench/printer_base.cxx | 15 +- nvbench/printer_multiplex.cuh | 14 +- nvbench/printer_multiplex.cxx | 12 +- nvbench/range.cuh | 8 +- nvbench/runner.cuh | 59 +++---- nvbench/runner.cxx | 13 +- nvbench/state.cuh | 124 ++++---------- nvbench/state.cxx | 74 ++++---- nvbench/string_axis.cuh | 20 +-- nvbench/summary.cuh | 6 +- nvbench/test_kernels.cuh | 10 +- nvbench/type_axis.cuh | 20 +-- nvbench/type_axis.cxx | 15 +- nvbench/type_strings.cuh | 33 ++-- 74 files changed, 835 insertions(+), 1479 deletions(-) diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh index 353855a..2663191 100644 --- a/nvbench/axes_metadata.cuh +++ b/nvbench/axes_metadata.cuh @@ -41,8 +41,8 @@ struct axes_metadata template explicit axes_metadata(nvbench::type_list); - axes_metadata() = default; - axes_metadata(axes_metadata &&) = default; + axes_metadata() = default; + axes_metadata(axes_metadata &&) = default; axes_metadata &operator=(axes_metadata &&) = default; axes_metadata(const axes_metadata &); @@ -58,20 +58,16 @@ struct axes_metadata void add_string_axis(std::string name, std::vector data); - [[nodiscard]] const nvbench::int64_axis & - get_int64_axis(std::string_view name) const; + [[nodiscard]] const nvbench::int64_axis &get_int64_axis(std::string_view name) const; [[nodiscard]] nvbench::int64_axis &get_int64_axis(std::string_view name); - [[nodiscard]] const nvbench::float64_axis & - get_float64_axis(std::string_view name) const; + [[nodiscard]] const nvbench::float64_axis &get_float64_axis(std::string_view name) const; [[nodiscard]] nvbench::float64_axis &get_float64_axis(std::string_view name); - [[nodiscard]] const nvbench::string_axis & - get_string_axis(std::string_view name) const; + [[nodiscard]] const nvbench::string_axis &get_string_axis(std::string_view name) const; [[nodiscard]] nvbench::string_axis &get_string_axis(std::string_view name); - [[nodiscard]] const nvbench::type_axis & - get_type_axis(std::string_view name) const; + [[nodiscard]] const nvbench::type_axis &get_type_axis(std::string_view name) const; [[nodiscard]] nvbench::type_axis &get_type_axis(std::string_view name); [[nodiscard]] const nvbench::type_axis &get_type_axis(std::size_t index) const; @@ -83,10 +79,9 @@ struct axes_metadata [[nodiscard]] const nvbench::axis_base &get_axis(std::string_view name) const; [[nodiscard]] nvbench::axis_base &get_axis(std::string_view name); - [[nodiscard]] const nvbench::axis_base & - get_axis(std::string_view name, nvbench::axis_type type) const; - [[nodiscard]] nvbench::axis_base &get_axis(std::string_view name, - nvbench::axis_type type); + [[nodiscard]] const nvbench::axis_base &get_axis(std::string_view name, + nvbench::axis_type type) const; + [[nodiscard]] nvbench::axis_base &get_axis(std::string_view name, nvbench::axis_type type); [[nodiscard]] static std::vector generate_default_type_axis_names(std::size_t num_type_axes); @@ -101,7 +96,7 @@ axes_metadata::axes_metadata(nvbench::type_list) { using type_axes_list = nvbench::type_list; constexpr auto num_type_axes = nvbench::tl::size::value; - auto names = axes_metadata::generate_default_type_axis_names(num_type_axes); + auto names = axes_metadata::generate_default_type_axis_names(num_type_axes); auto names_iter = names.begin(); // contents will be moved from nvbench::tl::foreach( @@ -114,8 +109,7 @@ axes_metadata::axes_metadata(nvbench::type_list) // The word "type" appears 6 times in the next line. // Every. Single. Token. typedef typename decltype(wrapped_type)::type type_list; - auto axis = std::make_unique(std::move(*names_iter++), - type_axis_index); + auto axis = std::make_unique(std::move(*names_iter++), type_axis_index); axis->template set_inputs(); axes.push_back(std::move(axis)); }); diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx index 044bc91..ef51a96 100644 --- a/nvbench/axes_metadata.cxx +++ b/nvbench/axes_metadata.cxx @@ -64,9 +64,7 @@ try auto &axis = *m_axes[i]; if (axis.get_type() != nvbench::axis_type::type) { - NVBENCH_THROW(std::runtime_error, - "Number of names exceeds number of type axes ({})", - i); + NVBENCH_THROW(std::runtime_error, "Number of names exceeds number of type axes ({})", i); } axis.set_name(std::move(names[i])); @@ -81,8 +79,7 @@ catch (std::exception &e) names); } -void axes_metadata::add_float64_axis(std::string name, - std::vector data) +void axes_metadata::add_float64_axis(std::string name, std::vector data) { auto axis = std::make_unique(std::move(name)); axis->set_inputs(std::move(data)); @@ -98,8 +95,7 @@ void axes_metadata::add_int64_axis(std::string name, m_axes.push_back(std::move(axis)); } -void axes_metadata::add_string_axis(std::string name, - std::vector data) +void axes_metadata::add_string_axis(std::string name, std::vector data) { auto axis = std::make_unique(std::move(name)); axis->set_inputs(std::move(data)); @@ -188,10 +184,9 @@ nvbench::type_axis &axes_metadata::get_type_axis(std::size_t index) const axis_base &axes_metadata::get_axis(std::string_view name) const { - auto iter = - std::find_if(m_axes.cbegin(), m_axes.cend(), [&name](const auto &axis) { - return axis->get_name() == name; - }); + auto iter = std::find_if(m_axes.cbegin(), m_axes.cend(), [&name](const auto &axis) { + return axis->get_name() == name; + }); if (iter == m_axes.cend()) { @@ -203,10 +198,9 @@ const axis_base &axes_metadata::get_axis(std::string_view name) const axis_base &axes_metadata::get_axis(std::string_view name) { - auto iter = - std::find_if(m_axes.begin(), m_axes.end(), [&name](const auto &axis) { - return axis->get_name() == name; - }); + auto iter = std::find_if(m_axes.begin(), m_axes.end(), [&name](const auto &axis) { + return axis->get_name() == name; + }); if (iter == m_axes.end()) { @@ -216,8 +210,7 @@ axis_base &axes_metadata::get_axis(std::string_view name) return **iter; } -const axis_base &axes_metadata::get_axis(std::string_view name, - nvbench::axis_type type) const +const axis_base &axes_metadata::get_axis(std::string_view name, nvbench::axis_type type) const { const auto &axis = this->get_axis(name); if (axis.get_type() != type) @@ -231,8 +224,7 @@ const axis_base &axes_metadata::get_axis(std::string_view name, return axis; } -axis_base &axes_metadata::get_axis(std::string_view name, - nvbench::axis_type type) +axis_base &axes_metadata::get_axis(std::string_view name, nvbench::axis_type type) { auto &axis = this->get_axis(name); if (axis.get_type() != type) @@ -246,8 +238,7 @@ axis_base &axes_metadata::get_axis(std::string_view name, return axis; } -std::vector -axes_metadata::generate_default_type_axis_names(std::size_t num_type_axes) +std::vector axes_metadata::generate_default_type_axis_names(std::size_t num_type_axes) { switch (num_type_axes) { diff --git a/nvbench/axis_base.cuh b/nvbench/axis_base.cuh index 712172f..85d92c7 100644 --- a/nvbench/axis_base.cuh +++ b/nvbench/axis_base.cuh @@ -47,10 +47,7 @@ struct axis_base [[nodiscard]] axis_type get_type() const { return m_type; } - [[nodiscard]] std::string_view get_type_as_string() const - { - return axis_type_to_string(m_type); - } + [[nodiscard]] std::string_view get_type_as_string() const { return axis_type_to_string(m_type); } [[nodiscard]] std::string_view get_flags_as_string() const { diff --git a/nvbench/axis_base.cxx b/nvbench/axis_base.cxx index 6d0bd4d..166f1ba 100644 --- a/nvbench/axis_base.cxx +++ b/nvbench/axis_base.cxx @@ -23,9 +23,6 @@ namespace nvbench axis_base::~axis_base() = default; -std::unique_ptr axis_base::clone() const -{ - return this->do_clone(); -} +std::unique_ptr axis_base::clone() const { return this->do_clone(); } } // namespace nvbench diff --git a/nvbench/benchmark.cuh b/nvbench/benchmark.cuh index 5e050d1..a226070 100644 --- a/nvbench/benchmark.cuh +++ b/nvbench/benchmark.cuh @@ -57,18 +57,14 @@ struct benchmark final : public benchmark_base using type_axes = TypeAxes; using type_configs = nvbench::tl::cartesian_product; - static constexpr std::size_t num_type_configs = - nvbench::tl::size{}; + static constexpr std::size_t num_type_configs = nvbench::tl::size{}; benchmark() : benchmark_base(type_axes{}) {} private: - std::unique_ptr do_clone() const final - { - return std::make_unique(); - } + std::unique_ptr do_clone() const final { return std::make_unique(); } void do_set_type_axes_names(std::vector names) final { diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index 3a16408..723afcd 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -80,32 +80,28 @@ struct benchmark_base return *this; } - benchmark_base &add_float64_axis(std::string name, - std::vector data) + benchmark_base &add_float64_axis(std::string name, std::vector data) { m_axes.add_float64_axis(std::move(name), std::move(data)); return *this; } - benchmark_base &add_int64_axis( - std::string name, - std::vector data, - nvbench::int64_axis_flags flags = nvbench::int64_axis_flags::none) + benchmark_base &add_int64_axis(std::string name, + std::vector data, + nvbench::int64_axis_flags flags = nvbench::int64_axis_flags::none) { m_axes.add_int64_axis(std::move(name), std::move(data), flags); return *this; } - benchmark_base &add_int64_power_of_two_axis(std::string name, - std::vector data) + benchmark_base &add_int64_power_of_two_axis(std::string name, std::vector data) { return this->add_int64_axis(std::move(name), std::move(data), nvbench::int64_axis_flags::power_of_two); } - benchmark_base &add_string_axis(std::string name, - std::vector data) + benchmark_base &add_string_axis(std::string name, std::vector data) { m_axes.add_string_axis(std::move(name), std::move(data)); return *this; @@ -133,48 +129,30 @@ struct benchmark_base return *this; } - [[nodiscard]] const std::vector &get_devices() const - { - return m_devices; - } + [[nodiscard]] const std::vector &get_devices() const { return m_devices; } [[nodiscard]] nvbench::axes_metadata &get_axes() { return m_axes; } - [[nodiscard]] const nvbench::axes_metadata &get_axes() const - { - return m_axes; - } + [[nodiscard]] const nvbench::axes_metadata &get_axes() const { return m_axes; } // Computes the number of configs in the benchmark. // Unlike get_states().size(), this method may be used prior to calling run(). [[nodiscard]] std::size_t get_config_count() const; // Is empty until run() is called. - [[nodiscard]] const std::vector &get_states() const - { - return m_states; - } + [[nodiscard]] const std::vector &get_states() const { return m_states; } [[nodiscard]] std::vector &get_states() { return m_states; } void run() { this->do_run(); } - void set_printer(nvbench::printer_base &printer) - { - m_printer = std::ref(printer); - } + void set_printer(nvbench::printer_base &printer) { m_printer = std::ref(printer); } void clear_printer() { m_printer = std::nullopt; } - [[nodiscard]] optional_ref get_printer() const - { - return m_printer; - } + [[nodiscard]] optional_ref get_printer() const { return m_printer; } /// Execute at least this many trials per measurement. @{ - [[nodiscard]] nvbench::int64_t get_min_samples() const - { - return m_min_samples; - } + [[nodiscard]] nvbench::int64_t get_min_samples() const { return m_min_samples; } benchmark_base &set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; @@ -193,7 +171,7 @@ struct benchmark_base } /// @} - /// If true, the benchmark does not use the blocking_kernel. This is intended + /// If true, the benchmark does not use the blocking_kernel. This is intended /// for use with external profiling tools. @{ [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; } benchmark_base &set_disable_blocking_kernel(bool v) diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx index 2d08fdb..5121a52 100644 --- a/nvbench/benchmark_base.cxx +++ b/nvbench/benchmark_base.cxx @@ -68,8 +68,7 @@ std::size_t benchmark_base::get_config_count() const std::size_t{1}, std::multiplies<>{}, [](const auto &axis_ptr) { - if (const auto *type_axis_ptr = - dynamic_cast(axis_ptr.get()); + if (const auto *type_axis_ptr = dynamic_cast(axis_ptr.get()); type_axis_ptr != nullptr) { return type_axis_ptr->get_active_count(); diff --git a/nvbench/benchmark_manager.cuh b/nvbench/benchmark_manager.cuh index 39b1717..7316445 100644 --- a/nvbench/benchmark_manager.cuh +++ b/nvbench/benchmark_manager.cuh @@ -31,8 +31,7 @@ namespace nvbench */ struct benchmark_manager { - using benchmark_vector = - std::vector>; + using benchmark_vector = std::vector>; /** * @return The singleton benchmark_manager instance. @@ -53,25 +52,21 @@ struct benchmark_manager * Get a non-mutable reference to benchmark with the specified name/index. * @{ */ - [[nodiscard]] const benchmark_base & - get_benchmark(const std::string &name) const; + [[nodiscard]] const benchmark_base &get_benchmark(const std::string &name) const; [[nodiscard]] const benchmark_base &get_benchmark(std::size_t idx) const { return *m_benchmarks.at(idx); } /**@}*/ - [[nodiscard]] const benchmark_vector &get_benchmarks() const - { - return m_benchmarks; - }; + [[nodiscard]] const benchmark_vector &get_benchmarks() const { return m_benchmarks; }; private: - benchmark_manager() = default; - benchmark_manager(const benchmark_manager &) = delete; - benchmark_manager(benchmark_manager &&) = delete; + benchmark_manager() = default; + benchmark_manager(const benchmark_manager &) = delete; + benchmark_manager(benchmark_manager &&) = delete; benchmark_manager &operator=(const benchmark_manager &) = delete; - benchmark_manager &operator=(benchmark_manager &&) = delete; + benchmark_manager &operator=(benchmark_manager &&) = delete; benchmark_vector m_benchmarks; }; diff --git a/nvbench/benchmark_manager.cxx b/nvbench/benchmark_manager.cxx index 2a0ca60..cd84f61 100644 --- a/nvbench/benchmark_manager.cxx +++ b/nvbench/benchmark_manager.cxx @@ -43,21 +43,18 @@ benchmark_base &benchmark_manager::add(std::unique_ptr bench) benchmark_manager::benchmark_vector benchmark_manager::clone_benchmarks() const { benchmark_vector result(m_benchmarks.size()); - std::transform(m_benchmarks.cbegin(), - m_benchmarks.cend(), - result.begin(), - [](const auto &bench) { return bench->clone(); }); + std::transform(m_benchmarks.cbegin(), m_benchmarks.cend(), result.begin(), [](const auto &bench) { + return bench->clone(); + }); return result; } -const benchmark_base & -benchmark_manager::get_benchmark(const std::string &name) const +const benchmark_base &benchmark_manager::get_benchmark(const std::string &name) const { - auto iter = std::find_if(m_benchmarks.cbegin(), - m_benchmarks.cend(), - [&name](const auto &bench_ptr) { - return bench_ptr->get_name() == name; - }); + auto iter = + std::find_if(m_benchmarks.cbegin(), m_benchmarks.cend(), [&name](const auto &bench_ptr) { + return bench_ptr->get_name() == name; + }); if (iter == m_benchmarks.cend()) { NVBENCH_THROW(std::out_of_range, "No benchmark named '{}'.", name); diff --git a/nvbench/blocking_kernel.cu b/nvbench/blocking_kernel.cu index 1ee5855..f347833 100644 --- a/nvbench/blocking_kernel.cu +++ b/nvbench/blocking_kernel.cu @@ -42,8 +42,8 @@ __global__ void block_stream(const volatile nvbench::int32_t *flag, nvbench::float64_t timeout) { const auto start_point = cuda::std::chrono::high_resolution_clock::now(); - const auto timeout_ns = cuda::std::chrono::nanoseconds( - static_cast(timeout * 1e9)); + const auto timeout_ns = + cuda::std::chrono::nanoseconds(static_cast(timeout * 1e9)); const auto timeout_point = start_point + timeout_ns; const bool use_timeout = timeout >= 0.; @@ -57,41 +57,40 @@ __global__ void block_stream(const volatile nvbench::int32_t *flag, { *timeout_flag = 1; __threadfence_system(); // Ensure timeout flag visibility on host. - printf( - "\n" - "######################################################################\n" - "##################### Possible Deadlock Detected #####################\n" - "######################################################################\n" - "\n" - "Forcing unblock: The current measurement appears to have deadlocked\n" - "and the results cannot be trusted.\n" - "\n" - "This happens when the KernelLauncher synchronizes the CUDA device.\n" - "If this is the case, pass the `sync` exec_tag to the `exec` call:\n" - "\n" - " state.exec(); // Deadlock\n" - " state.exec(nvbench::exec_tag::sync, ); // Safe\n" - "\n" - "This tells NVBench about the sync so it can run the benchmark safely.\n" - "\n" - "If the KernelLauncher does not synchronize but has a very long \n" - "execution time, this may be a false positive. If so, disable this\n" - "check with:\n" - "\n" - " state.set_blocking_kernel_timeout(-1);\n" - "\n" - "The current timeout is set to %0.5g seconds.\n" - "\n" - "For more information, see the 'Benchmarks that sync' section of the\n" - "NVBench documentation.\n" - "\n" - "If this happens while profiling with an external tool,\n" - "pass the `--disable-blocking-kernel` flag or the `--profile` flag\n" - "(to also only run the benchmark once) to the executable.\n" - "\n" - "For more information, see the 'Benchmark Properties' section of the\n" - "NVBench documentation.\n\n", - timeout); + printf("\n" + "######################################################################\n" + "##################### Possible Deadlock Detected #####################\n" + "######################################################################\n" + "\n" + "Forcing unblock: The current measurement appears to have deadlocked\n" + "and the results cannot be trusted.\n" + "\n" + "This happens when the KernelLauncher synchronizes the CUDA device.\n" + "If this is the case, pass the `sync` exec_tag to the `exec` call:\n" + "\n" + " state.exec(); // Deadlock\n" + " state.exec(nvbench::exec_tag::sync, ); // Safe\n" + "\n" + "This tells NVBench about the sync so it can run the benchmark safely.\n" + "\n" + "If the KernelLauncher does not synchronize but has a very long \n" + "execution time, this may be a false positive. If so, disable this\n" + "check with:\n" + "\n" + " state.set_blocking_kernel_timeout(-1);\n" + "\n" + "The current timeout is set to %0.5g seconds.\n" + "\n" + "For more information, see the 'Benchmarks that sync' section of the\n" + "NVBench documentation.\n" + "\n" + "If this happens while profiling with an external tool,\n" + "pass the `--disable-blocking-kernel` flag or the `--profile` flag\n" + "(to also only run the benchmark once) to the executable.\n" + "\n" + "For more information, see the 'Benchmark Properties' section of the\n" + "NVBench documentation.\n\n", + timeout); } } @@ -102,15 +101,11 @@ namespace nvbench blocking_kernel::blocking_kernel() { - NVBENCH_CUDA_CALL(cudaHostRegister(&m_host_flag, - sizeof(m_host_flag), - cudaHostRegisterMapped)); + NVBENCH_CUDA_CALL(cudaHostRegister(&m_host_flag, sizeof(m_host_flag), cudaHostRegisterMapped)); NVBENCH_CUDA_CALL(cudaHostGetDevicePointer(&m_device_flag, &m_host_flag, 0)); - NVBENCH_CUDA_CALL(cudaHostRegister(&m_host_timeout_flag, - sizeof(m_host_timeout_flag), - cudaHostRegisterMapped)); NVBENCH_CUDA_CALL( - cudaHostGetDevicePointer(&m_device_timeout_flag, &m_host_timeout_flag, 0)); + cudaHostRegister(&m_host_timeout_flag, sizeof(m_host_timeout_flag), cudaHostRegisterMapped)); + NVBENCH_CUDA_CALL(cudaHostGetDevicePointer(&m_device_timeout_flag, &m_host_timeout_flag, 0)); } blocking_kernel::~blocking_kernel() @@ -119,14 +114,11 @@ blocking_kernel::~blocking_kernel() NVBENCH_CUDA_CALL_NOEXCEPT(cudaHostUnregister(&m_host_timeout_flag)); } -void blocking_kernel::block(const nvbench::cuda_stream &stream, - nvbench::float64_t timeout) +void blocking_kernel::block(const nvbench::cuda_stream &stream, nvbench::float64_t timeout) { m_host_flag = 0; m_host_timeout_flag = 0; - block_stream<<<1, 1, 0, stream>>>(m_device_flag, - m_device_timeout_flag, - timeout); + block_stream<<<1, 1, 0, stream>>>(m_device_flag, m_device_timeout_flag, timeout); } void blocking_kernel::timeout_detected() diff --git a/nvbench/blocking_kernel.cuh b/nvbench/blocking_kernel.cuh index ecbfed8..13f737e 100644 --- a/nvbench/blocking_kernel.cuh +++ b/nvbench/blocking_kernel.cuh @@ -97,10 +97,10 @@ struct blocking_kernel } // move-only - blocking_kernel(const blocking_kernel &) = delete; - blocking_kernel(blocking_kernel &&) = default; + blocking_kernel(const blocking_kernel &) = delete; + blocking_kernel(blocking_kernel &&) = default; blocking_kernel &operator=(const blocking_kernel &) = delete; - blocking_kernel &operator=(blocking_kernel &&) = default; + blocking_kernel &operator=(blocking_kernel &&) = default; private: nvbench::int32_t m_host_flag{}; diff --git a/nvbench/callable.cuh b/nvbench/callable.cuh index ce7fff0..2cd1f15 100644 --- a/nvbench/callable.cuh +++ b/nvbench/callable.cuh @@ -30,35 +30,29 @@ struct state; // Define a simple callable wrapper around a function. This allows the function // to be used as a class template parameter. Intended for use with kernel // generators and `NVBENCH_BENCH` macros. -#define NVBENCH_DEFINE_UNIQUE_CALLABLE(function) \ +#define NVBENCH_DEFINE_UNIQUE_CALLABLE(function) \ NVBENCH_DEFINE_CALLABLE(function, NVBENCH_UNIQUE_IDENTIFIER(function)) -#define NVBENCH_DEFINE_CALLABLE(function, callable_name) \ - struct callable_name \ - { \ - void operator()(nvbench::state &state, nvbench::type_list<>) \ - { \ - function(state); \ - } \ +#define NVBENCH_DEFINE_CALLABLE(function, callable_name) \ + struct callable_name \ + { \ + void operator()(nvbench::state &state, nvbench::type_list<>) { function(state); } \ } -#define NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(function) \ - NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, \ - NVBENCH_UNIQUE_IDENTIFIER(function)) +#define NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(function) \ + NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, NVBENCH_UNIQUE_IDENTIFIER(function)) -#define NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, callable_name) \ - struct callable_name \ - { \ - template \ - void operator()(nvbench::state &state, nvbench::type_list) \ - { \ - function(state, nvbench::type_list{}); \ - } \ +#define NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, callable_name) \ + struct callable_name \ + { \ + template \ + void operator()(nvbench::state &state, nvbench::type_list) \ + { \ + function(state, nvbench::type_list{}); \ + } \ } -#define NVBENCH_UNIQUE_IDENTIFIER(prefix) \ - NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, __LINE__) -#define NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, unique_id) \ +#define NVBENCH_UNIQUE_IDENTIFIER(prefix) NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, __LINE__) +#define NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, unique_id) \ NVBENCH_UNIQUE_IDENTIFIER_IMPL2(prefix, unique_id) -#define NVBENCH_UNIQUE_IDENTIFIER_IMPL2(prefix, unique_id) \ - prefix##_line_##unique_id +#define NVBENCH_UNIQUE_IDENTIFIER_IMPL2(prefix, unique_id) prefix##_line_##unique_id diff --git a/nvbench/cpu_timer.cuh b/nvbench/cpu_timer.cuh index 09d3c54..d4ba655 100644 --- a/nvbench/cpu_timer.cuh +++ b/nvbench/cpu_timer.cuh @@ -30,27 +30,20 @@ struct cpu_timer __forceinline__ cpu_timer() = default; // move-only - cpu_timer(const cpu_timer &) = delete; - cpu_timer(cpu_timer &&) = default; + cpu_timer(const cpu_timer &) = delete; + cpu_timer(cpu_timer &&) = default; cpu_timer &operator=(const cpu_timer &) = delete; - cpu_timer &operator=(cpu_timer &&) = default; + cpu_timer &operator=(cpu_timer &&) = default; - __forceinline__ void start() - { - m_start = std::chrono::high_resolution_clock::now(); - } + __forceinline__ void start() { m_start = std::chrono::high_resolution_clock::now(); } - __forceinline__ void stop() - { - m_stop = std::chrono::high_resolution_clock::now(); - } + __forceinline__ void stop() { m_stop = std::chrono::high_resolution_clock::now(); } // In seconds: [[nodiscard]] __forceinline__ nvbench::float64_t get_duration() { const auto duration = m_stop - m_start; - const auto ns = - std::chrono::duration_cast(duration).count(); + const auto ns = std::chrono::duration_cast(duration).count(); return static_cast(ns) * (1e-9); } diff --git a/nvbench/create.cuh b/nvbench/create.cuh index 7aed1b7..902d6c3 100644 --- a/nvbench/create.cuh +++ b/nvbench/create.cuh @@ -27,19 +27,17 @@ #define NVBENCH_TYPE_AXES(...) nvbench::type_list<__VA_ARGS__> -#define NVBENCH_BENCH(KernelGenerator) \ - NVBENCH_DEFINE_UNIQUE_CALLABLE(KernelGenerator); \ - nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ - nvbench::benchmark_manager::get() \ - .add(std::make_unique< \ - nvbench::benchmark>()) \ +#define NVBENCH_BENCH(KernelGenerator) \ + NVBENCH_DEFINE_UNIQUE_CALLABLE(KernelGenerator); \ + nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ + nvbench::benchmark_manager::get() \ + .add(std::make_unique>()) \ .set_name(#KernelGenerator) -#define NVBENCH_BENCH_TYPES(KernelGenerator, TypeAxes) \ - NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(KernelGenerator); \ - nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ - nvbench::benchmark_manager::get() \ - .add(std::make_unique< \ - nvbench::benchmark>()) \ +#define NVBENCH_BENCH_TYPES(KernelGenerator, TypeAxes) \ + NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(KernelGenerator); \ + nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ + nvbench::benchmark_manager::get() \ + .add(std::make_unique< \ + nvbench::benchmark>()) \ .set_name(#KernelGenerator) diff --git a/nvbench/csv_printer.cu b/nvbench/csv_printer.cu index 6acb535..87fb88a 100644 --- a/nvbench/csv_printer.cu +++ b/nvbench/csv_printer.cu @@ -66,10 +66,8 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) { std::optional device = cur_state.get_device(); - std::string device_id = device ? fmt::to_string(device->get_id()) - : std::string{}; - std::string device_name = device ? std::string{device->get_name()} - : std::string{}; + std::string device_id = device ? fmt::to_string(device->get_id()) : std::string{}; + std::string device_name = device ? std::string{device->get_name()} : std::string{}; table.add_cell(row, "_bench_name", "Benchmark", bench_name); table.add_cell(row, "_device_id", "Device", std::move(device_id)); @@ -88,15 +86,11 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) name + "_axis_pow2_pretty", name + " (pow2)", fmt::format("2^{}", exponent)); - table.add_cell(row, - name + "_axis_plain", - fmt::format("{}", name), - fmt::to_string(value)); + table.add_cell(row, name + "_axis_plain", fmt::format("{}", name), fmt::to_string(value)); } else { - std::string value = std::visit(format_visitor, - axis_values.get_value(name)); + std::string value = std::visit(format_visitor, axis_values.get_value(name)); table.add_cell(row, name + "_axis", name, std::move(value)); } } @@ -117,14 +111,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) continue; } const std::string &tag = summ.get_tag(); - const std::string &header = summ.has_value("name") - ? summ.get_string("name") - : tag; + const std::string &header = summ.has_value("name") ? summ.get_string("name") : tag; - const std::string hint = summ.has_value("hint") - ? summ.get_string("hint") - : std::string{}; - std::string value = std::visit(format_visitor, summ.get_value("value")); + const std::string hint = summ.has_value("hint") ? summ.get_string("hint") : std::string{}; + std::string value = std::visit(format_visitor, summ.get_value("value")); if (hint == "duration") { table.add_cell(row, tag, header + " (sec)", std::move(value)); @@ -182,10 +172,7 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) std::size_t remaining = table.m_columns.size(); for (const auto &col : table.m_columns) { - fmt::format_to(buffer, - "{}{}", - col.rows[i], - (--remaining == 0) ? "" : ","); + fmt::format_to(buffer, "{}{}", col.rows[i], (--remaining == 0) ? "" : ","); } fmt::format_to(buffer, "\n"); } diff --git a/nvbench/cuda_call.cu b/nvbench/cuda_call.cu index 6cb304b..662c759 100644 --- a/nvbench/cuda_call.cu +++ b/nvbench/cuda_call.cu @@ -16,8 +16,8 @@ * limitations under the License. */ -#include #include +#include #include @@ -66,11 +66,7 @@ void throw_error(const std::string &filename, command)); } #else -void throw_error(const std::string &, - std::size_t, - const std::string &, - CUresult) -{} +void throw_error(const std::string &, std::size_t, const std::string &, CUresult) {} #endif void exit_error(const std::string &filename, diff --git a/nvbench/cuda_call.cuh b/nvbench/cuda_call.cuh index f1d6c45..ca8e765 100644 --- a/nvbench/cuda_call.cuh +++ b/nvbench/cuda_call.cuh @@ -18,52 +18,43 @@ #pragma once -#include #include +#include #include /// Throws a std::runtime_error if `call` doesn't return `cudaSuccess`. -#define NVBENCH_CUDA_CALL(call) \ - do \ - { \ - const cudaError_t nvbench_cuda_call_error = call; \ - if (nvbench_cuda_call_error != cudaSuccess) \ - { \ - nvbench::cuda_call::throw_error(__FILE__, \ - __LINE__, \ - #call, \ - nvbench_cuda_call_error); \ - } \ +#define NVBENCH_CUDA_CALL(call) \ + do \ + { \ + const cudaError_t nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != cudaSuccess) \ + { \ + nvbench::cuda_call::throw_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error); \ + } \ } while (false) /// Throws a std::runtime_error if `call` doesn't return `CUDA_SUCCESS`. -#define NVBENCH_DRIVER_API_CALL(call) \ - do \ - { \ - const CUresult nvbench_cuda_call_error = call; \ - if (nvbench_cuda_call_error != CUDA_SUCCESS) \ - { \ - nvbench::cuda_call::throw_error(__FILE__, \ - __LINE__, \ - #call, \ - nvbench_cuda_call_error); \ - } \ +#define NVBENCH_DRIVER_API_CALL(call) \ + do \ + { \ + const CUresult nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != CUDA_SUCCESS) \ + { \ + nvbench::cuda_call::throw_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error); \ + } \ } while (false) /// Terminates process with failure status if `call` doesn't return /// `cudaSuccess`. -#define NVBENCH_CUDA_CALL_NOEXCEPT(call) \ - do \ - { \ - const cudaError_t nvbench_cuda_call_error = call; \ - if (nvbench_cuda_call_error != cudaSuccess) \ - { \ - nvbench::cuda_call::exit_error(__FILE__, \ - __LINE__, \ - #call, \ - nvbench_cuda_call_error); \ - } \ +#define NVBENCH_CUDA_CALL_NOEXCEPT(call) \ + do \ + { \ + const cudaError_t nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != cudaSuccess) \ + { \ + nvbench::cuda_call::exit_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error); \ + } \ } while (false) namespace nvbench::cuda_call diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh index 6674c27..2c7536c 100644 --- a/nvbench/cuda_stream.cuh +++ b/nvbench/cuda_stream.cuh @@ -66,10 +66,10 @@ struct cuda_stream ~cuda_stream() = default; // move-only - cuda_stream(const cuda_stream &) = delete; + cuda_stream(const cuda_stream &) = delete; cuda_stream &operator=(const cuda_stream &) = delete; cuda_stream(cuda_stream &&) = default; - cuda_stream &operator=(cuda_stream &&) = default; + cuda_stream &operator=(cuda_stream &&) = default; /** * @return The `cudaStream_t` managed by this `cuda_stream`. diff --git a/nvbench/cuda_timer.cuh b/nvbench/cuda_timer.cuh index 0e022ce..e1c6e66 100644 --- a/nvbench/cuda_timer.cuh +++ b/nvbench/cuda_timer.cuh @@ -42,10 +42,10 @@ struct cuda_timer } // move-only - cuda_timer(const cuda_timer &) = delete; - cuda_timer(cuda_timer &&) = default; + cuda_timer(const cuda_timer &) = delete; + cuda_timer(cuda_timer &&) = default; cuda_timer &operator=(const cuda_timer &) = delete; - cuda_timer &operator=(cuda_timer &&) = default; + cuda_timer &operator=(cuda_timer &&) = default; __forceinline__ void start(cudaStream_t stream) { diff --git a/nvbench/cupti_profiler.cuh b/nvbench/cupti_profiler.cuh index 6e0e255..214706a 100644 --- a/nvbench/cupti_profiler.cuh +++ b/nvbench/cupti_profiler.cuh @@ -21,14 +21,13 @@ #include #include +#include #include #include -#include namespace nvbench::detail { - #ifdef NVBENCH_HAS_CUPTI /** * Pass required metrics in the constructor and organize your code as follows @@ -62,7 +61,7 @@ namespace nvbench::detail */ class cupti_profiler { - bool m_available {}; + bool m_available{}; std::string m_chip_name; // Counter data @@ -87,11 +86,10 @@ public: cupti_profiler(cupti_profiler &&) noexcept; cupti_profiler &operator=(cupti_profiler &&) noexcept; - cupti_profiler(const cupti_profiler &) = delete; + cupti_profiler(const cupti_profiler &) = delete; cupti_profiler &operator=(const cupti_profiler &) = delete; - cupti_profiler(nvbench::device_info device, - std::vector &&metric_names); + cupti_profiler(nvbench::device_info device, std::vector &&metric_names); ~cupti_profiler(); [[nodiscard]] bool is_initialized() const; @@ -125,5 +123,4 @@ private: }; #endif - } // namespace nvbench::detail diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index 6dcd81d..a3c076b 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -59,8 +59,7 @@ void nvpw_call(const NVPA_Status status) } // namespace -cupti_profiler::cupti_profiler(nvbench::device_info device, - std::vector &&metric_names) +cupti_profiler::cupti_profiler(nvbench::device_info device, std::vector &&metric_names) : m_metric_names(metric_names) , m_device(device) { @@ -154,12 +153,10 @@ class eval_request NVPW_MetricsEvaluator *evaluator_ptr; public: - eval_request(NVPW_MetricsEvaluator *evaluator_ptr, - const std::string &metric_name) + eval_request(NVPW_MetricsEvaluator *evaluator_ptr, const std::string &metric_name) : evaluator_ptr(evaluator_ptr) { - NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = - {}; + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = {}; params.structSize = NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE; @@ -168,8 +165,7 @@ public: params.pMetricEvalRequest = &request; params.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE; - nvpw_call( - NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(¶ms)); + nvpw_call(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(¶ms)); } [[nodiscard]] std::vector get_raw_dependencies() @@ -178,10 +174,9 @@ public: NVPW_MetricsEvaluator_GetMetricRawDependencies_Params params{}; - params.structSize = - NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE; - params.pMetricsEvaluator = evaluator_ptr; - params.pMetricEvalRequests = &request; + params.structSize = NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE; + params.pMetricsEvaluator = evaluator_ptr; + params.pMetricEvalRequests = &request; params.numMetricEvalRequests = 1; params.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE; params.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest); @@ -211,26 +206,23 @@ public: const std::uint8_t *counter_data_image = nullptr, const std::size_t counter_data_image_size = 0) { - NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params - scratch_buffer_param{}; + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params scratch_buffer_param{}; scratch_buffer_param.structSize = NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE; scratch_buffer_param.pChipName = chip_name.c_str(); scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image; - nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize( - &scratch_buffer_param)); + nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&scratch_buffer_param)); scratch_buffer.resize(scratch_buffer_param.scratchBufferSize); NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluator_params{}; - evaluator_params.structSize = - NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE; - evaluator_params.scratchBufferSize = scratch_buffer.size(); - evaluator_params.pScratchBuffer = scratch_buffer.data(); - evaluator_params.pChipName = chip_name.c_str(); + evaluator_params.structSize = NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE; + evaluator_params.scratchBufferSize = scratch_buffer.size(); + evaluator_params.pScratchBuffer = scratch_buffer.data(); + evaluator_params.pChipName = chip_name.c_str(); evaluator_params.pCounterAvailabilityImage = counter_availability_image; evaluator_params.pCounterDataImage = counter_data_image; evaluator_params.counterDataImageSize = counter_data_image_size; @@ -247,7 +239,7 @@ public: { NVPW_MetricsEvaluator_Destroy_Params params{}; - params.structSize = NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE; + params.structSize = NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator_ptr; nvpw_call(NVPW_MetricsEvaluator_Destroy(¶ms)); @@ -259,10 +251,7 @@ public: return {evaluator_ptr, metric_name}; } - [[nodiscard]] operator NVPW_MetricsEvaluator *() const - { - return evaluator_ptr; - } + [[nodiscard]] operator NVPW_MetricsEvaluator *() const { return evaluator_ptr; } }; } // namespace @@ -270,10 +259,10 @@ public: namespace { -[[nodiscard]] std::vector get_raw_metric_requests( - const std::string &chip_name, - const std::vector &metric_names, - const std::uint8_t *counter_availability_image = nullptr) +[[nodiscard]] std::vector +get_raw_metric_requests(const std::string &chip_name, + const std::vector &metric_names, + const std::uint8_t *counter_availability_image = nullptr) { metric_evaluator evaluator(chip_name, counter_availability_image); @@ -282,8 +271,7 @@ namespace for (auto &metric_name : metric_names) { - for (auto &raw_dependency : - evaluator.create_request(metric_name).get_raw_dependencies()) + for (auto &raw_dependency : evaluator.create_request(metric_name).get_raw_dependencies()) { raw_metric_names.push_back(raw_dependency); } @@ -295,10 +283,10 @@ namespace for (auto &raw_name : raw_metric_names) { NVPA_RawMetricRequest metricRequest{}; - metricRequest.structSize = NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE; - metricRequest.pMetricName = raw_name; - metricRequest.isolated = true; - metricRequest.keepInstances = true; + metricRequest.structSize = NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE; + metricRequest.pMetricName = raw_name; + metricRequest.isolated = true; + metricRequest.keepInstances = true; raw_requests.push_back(metricRequest); } @@ -309,12 +297,11 @@ class metrics_config { bool initialized{}; - void create(const std::string &chip_name, - const std::uint8_t *availability_image) + void create(const std::string &chip_name, const std::uint8_t *availability_image) { NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{}; - params.structSize = NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE; + params.structSize = NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE; params.activityKind = NVPA_ACTIVITY_KIND_PROFILER; params.pChipName = chip_name.c_str(); params.pCounterAvailabilityImage = availability_image; @@ -329,9 +316,8 @@ class metrics_config { NVPW_RawMetricsConfig_SetCounterAvailability_Params params{}; - params.structSize = - NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE; - params.pRawMetricsConfig = raw_metrics_config; + params.structSize = NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE; + params.pRawMetricsConfig = raw_metrics_config; params.pCounterAvailabilityImage = availability_image; nvpw_call(NVPW_RawMetricsConfig_SetCounterAvailability(¶ms)); @@ -341,7 +327,7 @@ class metrics_config { NVPW_RawMetricsConfig_BeginPassGroup_Params params{}; - params.structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(¶ms)); @@ -351,7 +337,7 @@ class metrics_config { NVPW_RawMetricsConfig_AddMetrics_Params params{}; - params.structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.pRawMetricRequests = raw_metric_requests.data(); params.numMetricRequests = raw_metric_requests.size(); @@ -363,7 +349,7 @@ class metrics_config { NVPW_RawMetricsConfig_EndPassGroup_Params params{}; - params.structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(¶ms)); @@ -373,8 +359,7 @@ class metrics_config { NVPW_RawMetricsConfig_GenerateConfigImage_Params params{}; - params.structSize = - NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_GenerateConfigImage(¶ms)); @@ -398,7 +383,7 @@ public: { NVPW_RawMetricsConfig_GetConfigImage_Params params{}; - params.structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.bytesAllocated = 0; params.pBuffer = nullptr; @@ -419,7 +404,7 @@ public: { NVPW_RawMetricsConfig_Destroy_Params params{}; - params.structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; NVPW_RawMetricsConfig_Destroy(¶ms); @@ -433,13 +418,12 @@ public: void cupti_profiler::initialize_config_image() { - m_config_image = - metrics_config(m_chip_name, - get_raw_metric_requests(m_chip_name, - m_metric_names, - m_availability_image.data()), - m_availability_image.data()) - .get_config_image(); + m_config_image = metrics_config(m_chip_name, + get_raw_metric_requests(m_chip_name, + m_metric_names, + m_availability_image.data()), + m_availability_image.data()) + .get_config_image(); } namespace @@ -450,12 +434,11 @@ class counter_data_builder bool initialized{}; public: - counter_data_builder(const std::string &chip_name, - const std::uint8_t *pCounterAvailabilityImage) + counter_data_builder(const std::string &chip_name, const std::uint8_t *pCounterAvailabilityImage) { NVPW_CUDA_CounterDataBuilder_Create_Params params{}; - params.structSize = NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE; + params.structSize = NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE; params.pChipName = chip_name.c_str(); params.pCounterAvailabilityImage = pCounterAvailabilityImage; @@ -471,7 +454,7 @@ public: { NVPW_CounterDataBuilder_Destroy_Params params{}; - params.structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE; + params.structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE; params.pCounterDataBuilder = builder; NVPW_CounterDataBuilder_Destroy(¶ms); @@ -488,16 +471,14 @@ void cupti_profiler::initialize_counter_data_prefix_image() const std::uint8_t *counter_availability_image = nullptr; std::vector raw_metric_requests = - get_raw_metric_requests(m_chip_name, - m_metric_names, - counter_availability_image); + get_raw_metric_requests(m_chip_name, m_metric_names, counter_availability_image); counter_data_builder data_builder(m_chip_name, counter_availability_image); { NVPW_CounterDataBuilder_AddMetrics_Params params{}; - params.structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE; + params.structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE; params.pCounterDataBuilder = data_builder.builder; params.pRawMetricRequests = raw_metric_requests.data(); params.numMetricRequests = raw_metric_requests.size(); @@ -508,8 +489,7 @@ void cupti_profiler::initialize_counter_data_prefix_image() { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params params{}; - params.structSize = - NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE; + params.structSize = NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE; params.pCounterDataBuilder = data_builder.builder; params.bytesAllocated = 0; params.pBuffer = nullptr; @@ -532,11 +512,9 @@ get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options) { CUpti_Profiler_CounterDataImage_CalculateSize_Params params{}; - params.structSize = - CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE; - params.pOptions = options; - params.sizeofCounterDataImageOptions = - CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; + params.structSize = CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE; + params.pOptions = options; + params.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; cupti_call(cuptiProfilerCounterDataImageCalculateSize(¶ms)); return params.counterDataImageSize; @@ -559,12 +537,10 @@ void cupti_profiler::initialize_counter_data_image() { CUpti_Profiler_CounterDataImage_Initialize_Params params{}; - params.structSize = - CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE; - params.sizeofCounterDataImageOptions = - CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; - params.pOptions = &counter_data_image_options; - params.counterDataImageSize = m_data_image.size(); + params.structSize = CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE; + params.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; + params.pOptions = &counter_data_image_options; + params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; cupti_call(cuptiProfilerCounterDataImageInitialize(¶ms)); @@ -578,8 +554,7 @@ void cupti_profiler::initialize_counter_data_image() params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; - cupti_call( - cuptiProfilerCounterDataImageCalculateScratchBufferSize(¶ms)); + cupti_call(cuptiProfilerCounterDataImageCalculateScratchBufferSize(¶ms)); m_data_scratch_buffer.resize(params.counterDataScratchBufferSize); } @@ -587,8 +562,7 @@ void cupti_profiler::initialize_counter_data_image() { CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params params{}; - params.structSize = - CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE; + params.structSize = CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; params.counterDataScratchBufferSize = m_data_scratch_buffer.size(); @@ -608,17 +582,14 @@ cupti_profiler::~cupti_profiler() } } -bool cupti_profiler::is_initialized() const -{ - return m_available; -} +bool cupti_profiler::is_initialized() const { return m_available; } void cupti_profiler::prepare_user_loop() { { CUpti_Profiler_BeginSession_Params params{}; - params.structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE; + params.structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE; params.ctx = nullptr; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; @@ -735,9 +706,7 @@ std::vector cupti_profiler::get_counter_values() if (params.numRanges != 1) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Something's gone wrong, one range is expected"); + NVBENCH_THROW(std::runtime_error, "{}", "Something's gone wrong, one range is expected"); } } @@ -752,8 +721,7 @@ std::vector cupti_profiler::get_counter_values() { NVPW_MetricsEvaluator_SetDeviceAttributes_Params params{}; - params.structSize = - NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE; + params.structSize = NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator; params.pCounterDataImage = m_data_image.data(); params.counterDataImageSize = m_data_image.size(); @@ -764,11 +732,10 @@ std::vector cupti_profiler::get_counter_values() { NVPW_MetricsEvaluator_EvaluateToGpuValues_Params params{}; - params.structSize = - NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE; - params.pMetricsEvaluator = evaluator; - params.pMetricEvalRequests = &request.request; - params.numMetricEvalRequests = 1; + params.structSize = NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE; + params.pMetricsEvaluator = evaluator; + params.pMetricEvalRequests = &request.request; + params.numMetricEvalRequests = 1; params.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE; params.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest); params.pCounterDataImage = m_data_image.data(); diff --git a/nvbench/detail/device_scope.cuh b/nvbench/detail/device_scope.cuh index de3a55a..c924bee 100644 --- a/nvbench/detail/device_scope.cuh +++ b/nvbench/detail/device_scope.cuh @@ -39,9 +39,9 @@ struct [[maybe_unused]] device_scope ~device_scope() { NVBENCH_CUDA_CALL(cudaSetDevice(m_old_device_id)); } // move-only - device_scope(device_scope &&) = default; - device_scope &operator=(device_scope &&) = default; - device_scope(const device_scope &) = delete; + device_scope(device_scope &&) = default; + device_scope &operator=(device_scope &&) = default; + device_scope(const device_scope &) = delete; device_scope &operator=(const device_scope &) = delete; private: diff --git a/nvbench/detail/kernel_launcher_timer_wrapper.cuh b/nvbench/detail/kernel_launcher_timer_wrapper.cuh index 39a999e..1efdf6e 100644 --- a/nvbench/detail/kernel_launcher_timer_wrapper.cuh +++ b/nvbench/detail/kernel_launcher_timer_wrapper.cuh @@ -33,7 +33,7 @@ namespace detail template struct kernel_launch_timer_wrapper { - explicit kernel_launch_timer_wrapper(KernelLauncher &launcher) + explicit kernel_launch_timer_wrapper(KernelLauncher &launcher) : m_kernel_launcher{launcher} {} diff --git a/nvbench/detail/l2flush.cuh b/nvbench/detail/l2flush.cuh index 0e33f7e..aefbfef 100644 --- a/nvbench/detail/l2flush.cuh +++ b/nvbench/detail/l2flush.cuh @@ -31,13 +31,12 @@ struct l2flush { int dev_id{}; NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id)); - NVBENCH_CUDA_CALL( - cudaDeviceGetAttribute(&m_l2_size, cudaDevAttrL2CacheSize, dev_id)); + NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&m_l2_size, cudaDevAttrL2CacheSize, dev_id)); if (m_l2_size > 0) { - void* buffer = m_l2_buffer; + void *buffer = m_l2_buffer; NVBENCH_CUDA_CALL(cudaMalloc(&buffer, m_l2_size)); - m_l2_buffer = reinterpret_cast(buffer); + m_l2_buffer = reinterpret_cast(buffer); } } diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 380d2cd..69ceb7e 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -54,15 +54,11 @@ void measure_cold_base::check() const auto device = m_state.get_device(); if (!device) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Device required for `cold` measurement."); + NVBENCH_THROW(std::runtime_error, "{}", "Device required for `cold` measurement."); } if (!device->is_active()) { // This means something went wrong higher up. Throw an error. - NVBENCH_THROW(std::runtime_error, - "{}", - "Internal error: Current device is not active."); + NVBENCH_THROW(std::runtime_error, "{}", "Internal error: Current device is not active."); } } @@ -92,13 +88,11 @@ void measure_cold_base::record_measurements() ++m_total_samples; // Compute convergence statistics using CUDA timings: - const auto mean_cuda_time = m_total_cuda_time / - static_cast(m_total_samples); - const auto cuda_stdev = - nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), - m_cuda_times.cend(), - mean_cuda_time); - auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; + const auto mean_cuda_time = m_total_cuda_time / static_cast(m_total_samples); + const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), + m_cuda_times.cend(), + mean_cuda_time); + auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; if (std::isfinite(cuda_rel_stdev)) { m_noise_tracker.push_back(cuda_rel_stdev); @@ -132,10 +126,10 @@ bool measure_cold_base::is_finished() { // Use the current noise as the stdev reference. const auto current_noise = m_noise_tracker.back(); - const auto noise_stdev = nvbench::detail::statistics::standard_deviation( - m_noise_tracker.cbegin(), - m_noise_tracker.cend(), - current_noise); + const auto noise_stdev = + nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(), + m_noise_tracker.cend(), + current_noise); const auto noise_rel_stdev = noise_stdev / current_noise; // If the rel stdev of the last N cuda noise measurements is less than @@ -162,13 +156,11 @@ bool measure_cold_base::is_finished() void measure_cold_base::run_trials_epilogue() { // Only need to compute this at the end, not per iteration. - const auto cpu_mean = m_total_cuda_time / - static_cast(m_total_samples); - const auto cpu_stdev = - nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(), - m_cpu_times.cend(), - cpu_mean); - m_cpu_noise = cpu_stdev / cpu_mean; + const auto cpu_mean = m_total_cuda_time / static_cast(m_total_samples); + const auto cpu_stdev = nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(), + m_cpu_times.cend(), + cpu_mean); + m_cpu_noise = cpu_stdev / cpu_mean; m_walltime_timer.stop(); } @@ -199,8 +191,7 @@ void measure_cold_base::generate_summaries() auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/relative"); summ.set_string("name", "Noise"); summ.set_string("hint", "percentage"); - summ.set_string("description", - "Relative standard deviation of isolated CPU times"); + summ.set_string("description", "Relative standard deviation of isolated CPU times"); summ.set_float64("value", m_cpu_noise); } @@ -219,12 +210,10 @@ void measure_cold_base::generate_summaries() auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative"); summ.set_string("name", "Noise"); summ.set_string("hint", "percentage"); - summ.set_string("description", - "Relative standard deviation of isolated GPU times"); + summ.set_string("description", "Relative standard deviation of isolated GPU times"); summ.set_float64("value", - m_noise_tracker.empty() - ? std::numeric_limits::infinity() - : m_noise_tracker.back()); + m_noise_tracker.empty() ? std::numeric_limits::infinity() + : m_noise_tracker.back()); } if (const auto items = m_state.get_element_count(); items != 0) @@ -232,8 +221,7 @@ void measure_cold_base::generate_summaries() auto &summ = m_state.add_summary("nv/cold/bw/item_rate"); summ.set_string("name", "Elem/s"); summ.set_string("hint", "item_rate"); - summ.set_string("description", - "Number of input elements processed per second"); + summ.set_string("description", "Number of input elements processed per second"); summ.set_float64("value", static_cast(items) / avg_cuda_time); } @@ -251,8 +239,8 @@ void measure_cold_base::generate_summaries() } { - const auto peak_gmem_bw = static_cast( - m_state.get_device()->get_global_memory_bus_bandwidth()); + const auto peak_gmem_bw = + static_cast(m_state.get_device()->get_global_memory_bus_bandwidth()); auto &summ = m_state.add_summary("nv/cold/bw/global/utilization"); summ.set_string("name", "BWUtil"); @@ -274,8 +262,7 @@ void measure_cold_base::generate_summaries() } // Log if a printer exists: - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); @@ -324,10 +311,7 @@ void measure_cold_base::generate_summaries() m_walltime_timer.get_duration(), m_total_samples)); - printer.process_bulk_data(m_state, - "nv/cold/sample_times", - "sample_times", - m_cuda_times); + printer.process_bulk_data(m_state, "nv/cold/sample_times", "sample_times", m_cuda_times); } } diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index 0cab36a..a5c2604 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -49,10 +49,10 @@ namespace detail struct measure_cold_base { explicit measure_cold_base(nvbench::state &exec_state); - measure_cold_base(const measure_cold_base &) = delete; - measure_cold_base(measure_cold_base &&) = delete; + measure_cold_base(const measure_cold_base &) = delete; + measure_cold_base(measure_cold_base &&) = delete; measure_cold_base &operator=(const measure_cold_base &) = delete; - measure_cold_base &operator=(measure_cold_base &&) = delete; + measure_cold_base &operator=(measure_cold_base &&) = delete; protected: template @@ -68,10 +68,7 @@ protected: void check_skip_time(nvbench::float64_t warmup_time); - __forceinline__ void flush_device_l2() - { - m_l2flush.flush(m_launch.get_stream()); - } + __forceinline__ void flush_device_l2() { m_l2flush.flush(m_launch.get_stream()); } __forceinline__ void sync_stream() const { diff --git a/nvbench/detail/measure_cupti.cu b/nvbench/detail/measure_cupti.cu index 9e8de6c..e583cd5 100644 --- a/nvbench/detail/measure_cupti.cu +++ b/nvbench/detail/measure_cupti.cu @@ -50,8 +50,7 @@ struct metric_traits; template <> struct metric_traits { - static constexpr const char *metric_name = - "dram__throughput.avg.pct_of_peak_sustained_elapsed"; + static constexpr const char *metric_name = "dram__throughput.avg.pct_of_peak_sustained_elapsed"; static constexpr const char *name = "HBWPeak"; static constexpr const char *hint = "percentage"; @@ -119,10 +118,7 @@ struct metric_traits static constexpr const char *description = "Hit rate at L1 cache."; static constexpr double divider = 100.0; - static bool is_collected(nvbench::state &m_state) - { - return m_state.is_l1_hit_rate_collected(); - }; + static bool is_collected(nvbench::state &m_state) { return m_state.is_l1_hit_rate_collected(); }; }; template <> @@ -134,10 +130,7 @@ struct metric_traits static constexpr const char *description = "Hit rate at L2 cache."; static constexpr double divider = 100.0; - static bool is_collected(nvbench::state &m_state) - { - return m_state.is_l2_hit_rate_collected(); - }; + static bool is_collected(nvbench::state &m_state) { return m_state.is_l2_hit_rate_collected(); }; }; template @@ -153,8 +146,7 @@ void add_metrics_impl(nvbench::state &state, std::vector &metrics) } template <> -void add_metrics_impl(nvbench::state &, - std::vector &) +void add_metrics_impl(nvbench::state &, std::vector &) {} std::vector add_metrics(nvbench::state &state) @@ -179,13 +171,11 @@ try // clang-format on catch (const std::exception &ex) { - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::warn, - fmt::format("CUPTI failed to construct profiler: {}", - ex.what())); + fmt::format("CUPTI failed to construct profiler: {}", ex.what())); } } @@ -194,15 +184,11 @@ void measure_cupti_base::check() const auto device = m_state.get_device(); if (!device) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Device required for `cupti` measurement."); + NVBENCH_THROW(std::runtime_error, "{}", "Device required for `cupti` measurement."); } if (!device->is_active()) { // This means something went wrong higher up. Throw an error. - NVBENCH_THROW(std::runtime_error, - "{}", - "Internal error: Current device is not active."); + NVBENCH_THROW(std::runtime_error, "{}", "Internal error: Current device is not active."); } } @@ -210,16 +196,13 @@ namespace { template -void gen_summary(std::size_t result_id, - nvbench::state &m_state, - const std::vector &result) +void gen_summary(std::size_t result_id, nvbench::state &m_state, const std::vector &result) { using metric = metric_traits; if (metric::is_collected(m_state)) { - auto &summ = - m_state.add_summary(fmt::format("nv/cupti/{}", metric::metric_name)); + auto &summ = m_state.add_summary(fmt::format("nv/cupti/{}", metric::metric_name)); summ.set_string("name", metric::name); summ.set_string("hint", metric::hint); summ.set_string("description", metric::description); @@ -231,9 +214,7 @@ void gen_summary(std::size_t result_id, } template <> -void gen_summary(std::size_t, - nvbench::state &, - const std::vector &) +void gen_summary(std::size_t, nvbench::state &, const std::vector &) {} void gen_summaries(nvbench::state &state, const std::vector &result) @@ -266,8 +247,7 @@ try } // Log if a printer exists: - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::pass, @@ -278,13 +258,11 @@ try } catch (const std::exception &ex) { - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::warn, - fmt::format("CUPTI failed to generate the summary: {}", - ex.what())); + fmt::format("CUPTI failed to generate the summary: {}", ex.what())); } } diff --git a/nvbench/detail/measure_cupti.cuh b/nvbench/detail/measure_cupti.cuh index 736c375..ec7b212 100644 --- a/nvbench/detail/measure_cupti.cuh +++ b/nvbench/detail/measure_cupti.cuh @@ -50,10 +50,10 @@ namespace detail struct measure_cupti_base { explicit measure_cupti_base(nvbench::state &exec_state); - measure_cupti_base(const measure_cupti_base &) = delete; - measure_cupti_base(measure_cupti_base &&) = delete; + measure_cupti_base(const measure_cupti_base &) = delete; + measure_cupti_base(measure_cupti_base &&) = delete; measure_cupti_base &operator=(const measure_cupti_base &) = delete; - measure_cupti_base &operator=(measure_cupti_base &&) = delete; + measure_cupti_base &operator=(measure_cupti_base &&) = delete; protected: struct kernel_launch_timer; @@ -61,10 +61,7 @@ protected: void check(); void generate_summaries(); - __forceinline__ void flush_device_l2() - { - m_l2flush.flush(m_launch.get_stream()); - } + __forceinline__ void flush_device_l2() { m_l2flush.flush(m_launch.get_stream()); } __forceinline__ void sync_stream() const { diff --git a/nvbench/detail/measure_hot.cu b/nvbench/detail/measure_hot.cu index 25e2119..9497122 100644 --- a/nvbench/detail/measure_hot.cu +++ b/nvbench/detail/measure_hot.cu @@ -47,9 +47,8 @@ measure_hot_base::measure_hot_base(state &exec_state) // to match the cold result if available. try { - nvbench::int64_t cold_samples = - m_state.get_summary("nv/cold/sample_size").get_int64("value"); - m_min_samples = std::max(m_min_samples, cold_samples); + nvbench::int64_t cold_samples = m_state.get_summary("nv/cold/sample_size").get_int64("value"); + m_min_samples = std::max(m_min_samples, cold_samples); // If the cold measurement ran successfully, disable skip_time. It'd just // be annoying to skip now. @@ -72,15 +71,11 @@ void measure_hot_base::check() const auto device = m_state.get_device(); if (!device) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Device required for `hot` measurement."); + NVBENCH_THROW(std::runtime_error, "{}", "Device required for `hot` measurement."); } if (!device->is_active()) { // This means something went wrong higher up. Throw an error. - NVBENCH_THROW(std::runtime_error, - "{}", - "Internal error: Current device is not active."); + NVBENCH_THROW(std::runtime_error, "{}", "Internal error: Current device is not active."); } } @@ -116,8 +111,7 @@ void measure_hot_base::generate_summaries() } // Log if a printer exists: - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); diff --git a/nvbench/detail/measure_hot.cuh b/nvbench/detail/measure_hot.cuh index 9c4c2ec..ae2b4f1 100644 --- a/nvbench/detail/measure_hot.cuh +++ b/nvbench/detail/measure_hot.cuh @@ -41,10 +41,10 @@ namespace detail struct measure_hot_base { explicit measure_hot_base(nvbench::state &exec_state); - measure_hot_base(const measure_hot_base &) = delete; - measure_hot_base(measure_hot_base &&) = delete; + measure_hot_base(const measure_hot_base &) = delete; + measure_hot_base(measure_hot_base &&) = delete; measure_hot_base &operator=(const measure_hot_base &) = delete; - measure_hot_base &operator=(measure_hot_base &&) = delete; + measure_hot_base &operator=(measure_hot_base &&) = delete; protected: void check(); @@ -131,7 +131,7 @@ private: // The .95 factor here pads the batch_size a bit to avoid needing a second // batch due to noise. const auto time_estimate = m_cuda_timer.get_duration() * 0.95; - auto batch_size = static_cast(m_min_time / time_estimate); + auto batch_size = static_cast(m_min_time / time_estimate); do { @@ -142,7 +142,7 @@ private: // Block stream until some work is queued. // Limit the number of kernel executions while blocked to prevent // deadlocks. See warnings on blocking_kernel. - const auto blocked_launches = std::min(batch_size, nvbench::int64_t{2}); + const auto blocked_launches = std::min(batch_size, nvbench::int64_t{2}); const auto unblocked_launches = batch_size - blocked_launches; this->block_stream(); @@ -189,7 +189,6 @@ private: break; // Stop iterating } - m_walltime_timer.stop(); if (m_walltime_timer.get_duration() > m_timeout) { diff --git a/nvbench/detail/ring_buffer.cuh b/nvbench/detail/ring_buffer.cuh index fa86200..645fa03 100644 --- a/nvbench/detail/ring_buffer.cuh +++ b/nvbench/detail/ring_buffer.cuh @@ -58,18 +58,12 @@ struct ring_buffer /** * The number of valid values in the ring buffer. Always <= capacity(). */ - [[nodiscard]] std::size_t size() const - { - return m_full ? m_buffer.size() : m_index; - } + [[nodiscard]] std::size_t size() const { return m_full ? m_buffer.size() : m_index; } /** * The maximum size of the ring buffer. */ - [[nodiscard]] std::size_t capacity() const - { - return m_buffer.size(); - } + [[nodiscard]] std::size_t capacity() const { return m_buffer.size(); } /** * @return True if the ring buffer is empty. diff --git a/nvbench/detail/state_exec.cuh b/nvbench/detail/state_exec.cuh index 9352a5f..bab2daf 100644 --- a/nvbench/detail/state_exec.cuh +++ b/nvbench/detail/state_exec.cuh @@ -53,7 +53,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) "`ExecTags` argument must be a member (or combination of " "members) from nvbench::exec_tag."); - constexpr auto measure_tags = tags & measure_mask; + constexpr auto measure_tags = tags & measure_mask; constexpr auto modifier_tags = tags & modifier_mask; // "run once" is handled by the cold measurement: @@ -81,8 +81,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) } else { - this->exec(cold | hot | tags, - std::forward(kernel_launcher)); + this->exec(cold | hot | tags, std::forward(kernel_launcher)); } return; } @@ -99,8 +98,8 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) constexpr bool use_blocking_kernel = !(tags & no_block); if constexpr (tags & timer) { - // Estimate bandwidth here - #ifdef NVBENCH_HAS_CUPTI +// Estimate bandwidth here +#ifdef NVBENCH_HAS_CUPTI if constexpr (!(modifier_tags & run_once)) { if (this->is_cupti_required()) @@ -110,7 +109,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) measure(); } } - #endif +#endif using measure_t = nvbench::detail::measure_cold; measure_t measure{*this, kernel_launcher}; @@ -121,8 +120,8 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) using wrapper_t = nvbench::detail::kernel_launch_timer_wrapper; wrapper_t wrapper{kernel_launcher}; - // Estimate bandwidth here - #ifdef NVBENCH_HAS_CUPTI +// Estimate bandwidth here +#ifdef NVBENCH_HAS_CUPTI if constexpr (!(modifier_tags & run_once)) { if (this->is_cupti_required()) @@ -132,10 +131,9 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) measure(); } } - #endif +#endif - using measure_t = - nvbench::detail::measure_cold; + using measure_t = nvbench::detail::measure_cold; measure_t measure(*this, wrapper); measure(); } @@ -143,12 +141,10 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) if constexpr (tags & hot) { - static_assert(!(tags & sync), - "Hot measurement doesn't support the `sync` exec_tag."); - static_assert(!(tags & timer), - "Hot measurement doesn't support the `timer` exec_tag."); + static_assert(!(tags & sync), "Hot measurement doesn't support the `sync` exec_tag."); + static_assert(!(tags & timer), "Hot measurement doesn't support the `timer` exec_tag."); constexpr bool use_blocking_kernel = !(tags & no_block); - using measure_t = nvbench::detail::measure_hot; + using measure_t = nvbench::detail::measure_hot; measure_t measure{*this, kernel_launcher}; measure(); } diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx index 8c153bf..26a897a 100644 --- a/nvbench/detail/state_generator.cxx +++ b/nvbench/detail/state_generator.cxx @@ -40,9 +40,7 @@ void state_iterator::add_axis(const nvbench::axis_base &axis) this->add_axis(axis.get_name(), axis.get_type(), axis.get_size()); } -void state_iterator::add_axis(std::string axis, - nvbench::axis_type type, - std::size_t size) +void state_iterator::add_axis(std::string axis, nvbench::axis_type type, std::size_t size) { m_indices.push_back({std::move(axis), type, std::size_t{0}, size}); } @@ -74,10 +72,7 @@ state_iterator::get_current_indices() const return m_indices; } -[[nodiscard]] bool state_iterator::iter_valid() const -{ - return m_current < m_total; -} +[[nodiscard]] bool state_iterator::iter_valid() const { return m_current < m_total; } void state_iterator::next() { @@ -102,7 +97,7 @@ state_generator::state_generator(const benchmark_base &bench) void state_generator::build_axis_configs() { - const axes_metadata &axes = m_benchmark.get_axes(); + const axes_metadata &axes = m_benchmark.get_axes(); const std::vector> &axes_vec = axes.get_axes(); // Construct two state_generators: @@ -118,35 +113,29 @@ void state_generator::build_axis_configs() type_axes.reserve(axes_vec.size()); // Filter all axes by into type and non-type: - std::for_each(axes_vec.cbegin(), - axes_vec.cend(), - [&non_type_si, &type_axes](const auto &axis) { - if (axis->get_type() == nvbench::axis_type::type) - { - type_axes.push_back( - std::cref(static_cast(*axis))); - } - else - { - non_type_si.add_axis(*axis); - } - }); + std::for_each(axes_vec.cbegin(), axes_vec.cend(), [&non_type_si, &type_axes](const auto &axis) { + if (axis->get_type() == nvbench::axis_type::type) + { + type_axes.push_back(std::cref(static_cast(*axis))); + } + else + { + non_type_si.add_axis(*axis); + } + }); // Reverse sort type axes by index. This way the state_generator's cartesian // product of the type axes values will be enumerated in the same order as // nvbench::tl::cartesian_product. This is necessary to ensure // that the correct states are passed to the corresponding benchmark // instantiations. - std::sort(type_axes.begin(), - type_axes.end(), - [](const auto &axis_1, const auto &axis_2) { - return axis_1.get().get_axis_index() > - axis_2.get().get_axis_index(); - }); + std::sort(type_axes.begin(), type_axes.end(), [](const auto &axis_1, const auto &axis_2) { + return axis_1.get().get_axis_index() > axis_2.get().get_axis_index(); + }); - std::for_each(type_axes.cbegin(), - type_axes.cend(), - [&type_si](const auto &axis) { type_si.add_axis(axis); }); + std::for_each(type_axes.cbegin(), type_axes.cend(), [&type_si](const auto &axis) { + type_si.add_axis(axis); + }); } // type_axis_configs generation: @@ -157,8 +146,8 @@ void state_generator::build_axis_configs() // Build type_axis_configs for (type_si.init(); type_si.iter_valid(); type_si.next()) { - auto &[config, active_mask] = m_type_axis_configs.emplace_back( - std::make_pair(nvbench::named_values{}, true)); + auto &[config, active_mask] = + m_type_axis_configs.emplace_back(std::make_pair(nvbench::named_values{}, true)); // Reverse the indices so they're once again in the same order as // specified: @@ -173,8 +162,7 @@ void state_generator::build_axis_configs() active_mask = false; } - config.set_string(axis_info.axis, - axis.get_input_string(axis_info.index)); + config.set_string(axis_info.axis, axis.get_input_string(axis_info.index)); } } // type_si } // type_axis_config generation @@ -199,21 +187,18 @@ void state_generator::build_axis_configs() break; case axis_type::int64: - config.set_int64( - axis_info.axis, - axes.get_int64_axis(axis_info.axis).get_value(axis_info.index)); + config.set_int64(axis_info.axis, + axes.get_int64_axis(axis_info.axis).get_value(axis_info.index)); break; case axis_type::float64: - config.set_float64( - axis_info.axis, - axes.get_float64_axis(axis_info.axis).get_value(axis_info.index)); + config.set_float64(axis_info.axis, + axes.get_float64_axis(axis_info.axis).get_value(axis_info.index)); break; case axis_type::string: - config.set_string( - axis_info.axis, - axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); + config.set_string(axis_info.axis, + axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); break; } // switch (type) } // for (axis_info : current_indices) @@ -239,15 +224,12 @@ void state_generator::build_states() } } -void state_generator::add_states_for_device( - const std::optional &device) +void state_generator::add_states_for_device(const std::optional &device) { const auto num_type_configs = m_type_axis_configs.size(); - for (std::size_t type_config_index = 0; type_config_index < num_type_configs; - ++type_config_index) + for (std::size_t type_config_index = 0; type_config_index < num_type_configs; ++type_config_index) { - const auto &[type_config, - axis_mask] = m_type_axis_configs[type_config_index]; + const auto &[type_config, axis_mask] = m_type_axis_configs[type_config_index]; if (!axis_mask) { // Don't generate inner vector if the type config is masked out. @@ -261,10 +243,7 @@ void state_generator::add_states_for_device( config.append(non_type_config); // Create benchmark: - m_states.push_back(nvbench::state{m_benchmark, - std::move(config), - device, - type_config_index}); + m_states.push_back(nvbench::state{m_benchmark, std::move(config), device, type_config_index}); } } } diff --git a/nvbench/detail/statistics.cuh b/nvbench/detail/statistics.cuh index 957bca4..ddafc43 100644 --- a/nvbench/detail/statistics.cuh +++ b/nvbench/detail/statistics.cuh @@ -36,8 +36,7 @@ namespace nvbench::detail::statistics * * If the input has fewer than 5 sample, infinity is returned. */ -template ::value_type> +template ::value_type> ValueType standard_deviation(Iter first, Iter last, ValueType mean) { static_assert(std::is_floating_point_v); diff --git a/nvbench/detail/throw.cuh b/nvbench/detail/throw.cuh index ffbe5bb..e3bb9fd 100644 --- a/nvbench/detail/throw.cuh +++ b/nvbench/detail/throw.cuh @@ -21,17 +21,15 @@ #include #include -#define NVBENCH_THROW(exception_type, format_str, ...) \ - throw exception_type(fmt::format("{}:{}: {}", \ - __FILE__, \ - __LINE__, \ - fmt::format(format_str, __VA_ARGS__))) +#define NVBENCH_THROW(exception_type, format_str, ...) \ + throw exception_type( \ + fmt::format("{}:{}: {}", __FILE__, __LINE__, fmt::format(format_str, __VA_ARGS__))) -#define NVBENCH_THROW_IF(condition, exception_type, format_str, ...) \ - do \ - { \ - if (condition) \ - { \ - NVBENCH_THROW(exception_type, format_str, __VA_ARGS__); \ - } \ +#define NVBENCH_THROW_IF(condition, exception_type, format_str, ...) \ + do \ + { \ + if (condition) \ + { \ + NVBENCH_THROW(exception_type, format_str, __VA_ARGS__); \ + } \ } while (false) diff --git a/nvbench/detail/transform_reduce.cuh b/nvbench/detail/transform_reduce.cuh index 8bc5db6..5625358 100644 --- a/nvbench/detail/transform_reduce.cuh +++ b/nvbench/detail/transform_reduce.cuh @@ -27,10 +27,7 @@ namespace nvbench::detail { -template +template InitValueT transform_reduce(InIterT first, InIterT last, InitValueT init, diff --git a/nvbench/detail/type_list_impl.cuh b/nvbench/detail/type_list_impl.cuh index d2e498c..8a18aa3 100644 --- a/nvbench/detail/type_list_impl.cuh +++ b/nvbench/detail/type_list_impl.cuh @@ -20,12 +20,10 @@ namespace tl::detail { template -auto size(nvbench::type_list) - -> std::integral_constant; +auto size(nvbench::type_list) -> std::integral_constant; template -auto get(nvbench::type_list) - -> std::tuple_element_t>; +auto get(nvbench::type_list) -> std::tuple_element_t>; template auto concat(nvbench::type_list, nvbench::type_list) @@ -44,9 +42,8 @@ struct prepend_each> template struct prepend_each> { - using cur = decltype(detail::concat(nvbench::type_list{}, TL{})); - using next = - typename detail::prepend_each>::type; + using cur = decltype(detail::concat(nvbench::type_list{}, TL{})); + using next = typename detail::prepend_each>::type; using type = decltype(detail::concat(nvbench::type_list{}, next{})); }; @@ -71,23 +68,20 @@ struct cartesian_product, TLTail...>> template struct cartesian_product>> { - using cur = nvbench::type_list>; - using next = - std::conditional_t>>::type, - nvbench::type_list<>>; + using cur = nvbench::type_list>; + using next = std::conditional_t< + sizeof...(Ts) != 0, + typename detail::cartesian_product>>::type, + nvbench::type_list<>>; using type = decltype(detail::concat(cur{}, next{})); }; template -struct cartesian_product< - nvbench::type_list, TL, TLTail...>> +struct cartesian_product, TL, TLTail...>> { - using tail_prod = - typename detail::cartesian_product>::type; - using cur = typename detail::prepend_each::type; - using next = typename detail::cartesian_product< + using tail_prod = typename detail::cartesian_product>::type; + using cur = typename detail::prepend_each::type; + using next = typename detail::cartesian_product< nvbench::type_list, TL, TLTail...>>::type; using type = decltype(detail::concat(cur{}, next{})); }; diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu index 02c6b97..8edad22 100644 --- a/nvbench/device_info.cu +++ b/nvbench/device_info.cu @@ -65,17 +65,15 @@ void device_info::set_persistence_mode(bool state) #else // NVBENCH_HAS_NVML try { - NVBENCH_NVML_CALL(nvmlDeviceSetPersistenceMode( - m_nvml_device, - state ? NVML_FEATURE_ENABLED : NVML_FEATURE_DISABLED)); + NVBENCH_NVML_CALL( + nvmlDeviceSetPersistenceMode(m_nvml_device, + state ? NVML_FEATURE_ENABLED : NVML_FEATURE_DISABLED)); } catch (nvml::call_failed &e) { if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Persistence mode is only supported on Linux."); + NVBENCH_THROW(std::runtime_error, "{}", "Persistence mode is only supported on Linux."); } else if (e.get_error_code() == NVML_ERROR_NO_PERMISSION) { @@ -104,30 +102,26 @@ try break; case clock_rate::base: - NVBENCH_NVML_CALL(nvmlDeviceSetGpuLockedClocks( - m_nvml_device, - static_cast(NVML_CLOCK_LIMIT_ID_TDP), - static_cast(NVML_CLOCK_LIMIT_ID_TDP))); + NVBENCH_NVML_CALL( + nvmlDeviceSetGpuLockedClocks(m_nvml_device, + static_cast(NVML_CLOCK_LIMIT_ID_TDP), + static_cast(NVML_CLOCK_LIMIT_ID_TDP))); break; case clock_rate::maximum: { - const auto max_mhz = static_cast( - this->get_sm_default_clock_rate() / (1000 * 1000)); - NVBENCH_NVML_CALL( - nvmlDeviceSetGpuLockedClocks(m_nvml_device, max_mhz, max_mhz)); + const auto max_mhz = + static_cast(this->get_sm_default_clock_rate() / (1000 * 1000)); + NVBENCH_NVML_CALL(nvmlDeviceSetGpuLockedClocks(m_nvml_device, max_mhz, max_mhz)); break; } default: - NVBENCH_THROW(std::runtime_error, - "Unrecognized clock rate: {}", - static_cast(rate)); + NVBENCH_THROW(std::runtime_error, "Unrecognized clock rate: {}", static_cast(rate)); } } catch (nvml::call_failed &e) { - if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED && - this->get_sm_version() < 700) + if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED && this->get_sm_version() < 700) { NVBENCH_THROW(std::runtime_error, "GPU clock rates can only be modified for Volta and later. " @@ -156,9 +150,7 @@ catch (nvml::call_failed &e) { if (!is_active()) { - NVBENCH_THROW(std::runtime_error, - "{}", - "get_context is called for inactive device"); + NVBENCH_THROW(std::runtime_error, "{}", "get_context is called for inactive device"); } CUcontext cu_context; diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh index 296a2c2..f0694df 100644 --- a/nvbench/device_info.cuh +++ b/nvbench/device_info.cuh @@ -54,10 +54,7 @@ struct device_info [[nodiscard]] int get_id() const { return m_id; } /// @return The name of the device. - [[nodiscard]] std::string_view get_name() const - { - return std::string_view(m_prop.name); - } + [[nodiscard]] std::string_view get_name() const { return std::string_view(m_prop.name); } [[nodiscard]] bool is_active() const { @@ -83,7 +80,6 @@ struct device_info /// @note Requires root / admin privileges. void set_persistence_mode(bool state); - /// Symbolic values for special clock rates enum class clock_rate { @@ -101,10 +97,7 @@ struct device_info void lock_gpu_clocks(clock_rate rate); /// @return The SM version of the current device as (major*100) + (minor*10). - [[nodiscard]] int get_sm_version() const - { - return m_prop.major * 100 + m_prop.minor * 10; - } + [[nodiscard]] int get_sm_version() const { return m_prop.major * 100 + m_prop.minor * 10; } /// @return The PTX version of the current device, e.g. sm_80 returns 800. [[nodiscard]] __forceinline__ int get_ptx_version() const @@ -119,46 +112,25 @@ struct device_info } /// @return The number of physical streaming multiprocessors on this device. - [[nodiscard]] int get_number_of_sms() const - { - return m_prop.multiProcessorCount; - } + [[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; } /// @return The maximum number of resident blocks per SM. - [[nodiscard]] int get_max_blocks_per_sm() const - { - return m_prop.maxBlocksPerMultiProcessor; - } + [[nodiscard]] int get_max_blocks_per_sm() const { return m_prop.maxBlocksPerMultiProcessor; } /// @return The maximum number of resident threads per SM. - [[nodiscard]] int get_max_threads_per_sm() const - { - return m_prop.maxThreadsPerMultiProcessor; - } + [[nodiscard]] int get_max_threads_per_sm() const { return m_prop.maxThreadsPerMultiProcessor; } /// @return The maximum number of threads per block. - [[nodiscard]] int get_max_threads_per_block() const - { - return m_prop.maxThreadsPerBlock; - } + [[nodiscard]] int get_max_threads_per_block() const { return m_prop.maxThreadsPerBlock; } /// @return The number of registers per SM. - [[nodiscard]] int get_registers_per_sm() const - { - return m_prop.regsPerMultiprocessor; - } + [[nodiscard]] int get_registers_per_sm() const { return m_prop.regsPerMultiprocessor; } /// @return The number of registers per block. - [[nodiscard]] int get_registers_per_block() const - { - return m_prop.regsPerBlock; - } + [[nodiscard]] int get_registers_per_block() const { return m_prop.regsPerBlock; } /// @return The total number of bytes available in global memory. - [[nodiscard]] std::size_t get_global_memory_size() const - { - return m_prop.totalGlobalMem; - } + [[nodiscard]] std::size_t get_global_memory_size() const { return m_prop.totalGlobalMem; } struct memory_info { @@ -176,10 +148,7 @@ struct device_info } /// @return The width of the global memory bus in bits. - [[nodiscard]] int get_global_memory_bus_width() const - { - return m_prop.memoryBusWidth; - } + [[nodiscard]] int get_global_memory_bus_width() const { return m_prop.memoryBusWidth; } //// @return The global memory bus bandwidth in bytes/sec. [[nodiscard]] std::size_t get_global_memory_bus_bandwidth() const @@ -201,10 +170,7 @@ struct device_info } /// @return The available amount of shared memory in bytes per block. - [[nodiscard]] std::size_t get_shared_memory_per_block() const - { - return m_prop.sharedMemPerBlock; - } + [[nodiscard]] std::size_t get_shared_memory_per_block() const { return m_prop.sharedMemPerBlock; } /// @return True if ECC is enabled on this device. [[nodiscard]] bool get_ecc_state() const { return m_prop.ECCEnabled; } @@ -224,23 +190,11 @@ struct device_info #endif /// @return A cached copy of the device's cudaDeviceProp. - [[nodiscard]] const cudaDeviceProp &get_cuda_device_prop() const - { - return m_prop; - } + [[nodiscard]] const cudaDeviceProp &get_cuda_device_prop() const { return m_prop; } - [[nodiscard]] bool operator<(const device_info &o) const - { - return m_id < o.m_id; - } - [[nodiscard]] bool operator==(const device_info &o) const - { - return m_id == o.m_id; - } - [[nodiscard]] bool operator!=(const device_info &o) const - { - return m_id != o.m_id; - } + [[nodiscard]] bool operator<(const device_info &o) const { return m_id < o.m_id; } + [[nodiscard]] bool operator==(const device_info &o) const { return m_id == o.m_id; } + [[nodiscard]] bool operator!=(const device_info &o) const { return m_id != o.m_id; } private: int m_id; @@ -267,11 +221,10 @@ try { nvbench::detail::device_scope _{dev_id}; cudaFuncAttributes attr{}; - NVBENCH_CUDA_CALL( - cudaFuncGetAttributes(&attr, ((const void*)nvbench::detail::noop_kernel_ptr) )); + NVBENCH_CUDA_CALL(cudaFuncGetAttributes(&attr, ((const void *)nvbench::detail::noop_kernel_ptr))); return attr.ptxVersion * 10; } -catch(...) +catch (...) { // Fail gracefully when no appropriate PTX is found for this device. return -1; } diff --git a/nvbench/device_manager.cuh b/nvbench/device_manager.cuh index 9490775..8e6fe9d 100644 --- a/nvbench/device_manager.cuh +++ b/nvbench/device_manager.cuh @@ -40,10 +40,7 @@ struct device_manager /** * @return The total number of detected CUDA devices. */ - [[nodiscard]] int get_number_of_devices() const - { - return static_cast(m_devices.size()); - } + [[nodiscard]] int get_number_of_devices() const { return static_cast(m_devices.size()); } /** * @return The number of devices actually used by all benchmarks. @@ -57,39 +54,27 @@ struct device_manager /** * @return The device_info object corresponding to `id`. */ - [[nodiscard]] const nvbench::device_info &get_device(int id) - { - return m_devices.at(id); - } + [[nodiscard]] const nvbench::device_info &get_device(int id) { return m_devices.at(id); } /** * @return A vector containing device_info objects for all detected CUDA * devices. */ - [[nodiscard]] const device_info_vector &get_devices() const - { - return m_devices; - } + [[nodiscard]] const device_info_vector &get_devices() const { return m_devices; } /** * @return A vector containing device_info objects for devices that are * actively used by all benchmarks. * @note This is only valid after nvbench::option_parser::parse executes. */ - [[nodiscard]] const device_info_vector &get_used_devices() const - { - return m_used_devices; - } + [[nodiscard]] const device_info_vector &get_used_devices() const { return m_used_devices; } private: device_manager(); friend struct option_parser; - void set_used_devices(device_info_vector devices) - { - m_used_devices = std::move(devices); - } + void set_used_devices(device_info_vector devices) { m_used_devices = std::move(devices); } device_info_vector m_devices; device_info_vector m_used_devices; diff --git a/nvbench/enum_type_list.cuh b/nvbench/enum_type_list.cuh index 6ec529b..614057f 100644 --- a/nvbench/enum_type_list.cuh +++ b/nvbench/enum_type_list.cuh @@ -64,10 +64,7 @@ struct type_strings> return std::to_string(Value); } - static std::string description() - { - return nvbench::demangle>(); - } + static std::string description() { return nvbench::demangle>(); } }; } // namespace nvbench @@ -86,15 +83,13 @@ struct type_strings> * \relatesalso enum_type_list * \relatesalso nvbench::enum_type_list */ -#define NVBENCH_DECLARE_ENUM_TYPE_STRINGS(T, \ - input_generator, \ - description_generator) \ - namespace nvbench \ - { \ - template \ - struct type_strings> \ - { \ - static std::string input_string() { return input_generator(Value); } \ - static std::string description() { return description_generator(Value); } \ - }; \ +#define NVBENCH_DECLARE_ENUM_TYPE_STRINGS(T, input_generator, description_generator) \ + namespace nvbench \ + { \ + template \ + struct type_strings> \ + { \ + static std::string input_string() { return input_generator(Value); } \ + static std::string description() { return description_generator(Value); } \ + }; \ } diff --git a/nvbench/exec_tag.cuh b/nvbench/exec_tag.cuh index b49ed36..466a441 100644 --- a/nvbench/exec_tag.cuh +++ b/nvbench/exec_tag.cuh @@ -31,16 +31,16 @@ enum class exec_flag none = 0x0, // Modifiers: - timer = 0x01, // KernelLauncher uses manual timing - no_block = 0x02, // Disables use of `blocking_kernel`. - sync = 0x04, // KernelLauncher has indicated that it will sync - run_once = 0x08, // Only run the benchmark once (for profiling). + timer = 0x01, // KernelLauncher uses manual timing + no_block = 0x02, // Disables use of `blocking_kernel`. + sync = 0x04, // KernelLauncher has indicated that it will sync + run_once = 0x08, // Only run the benchmark once (for profiling). modifier_mask = timer | no_block | sync | run_once, // Measurement types: - cold = 0x0100, // measure_hot - hot = 0x0200, // measure_cold - measure_mask = cold | hot + cold = 0x0100, // measure_hot + hot = 0x0200, // measure_cold + measure_mask = cold | hot }; } // namespace nvbench::detail @@ -120,7 +120,6 @@ constexpr inline auto timer = nvbench::exec_tag::impl::timer; /// Modifier used to indicate that the KernelGenerator will perform CUDA /// synchronizations. Without this flag such benchmarks will deadlock. -constexpr inline auto sync = nvbench::exec_tag::impl::no_block | - nvbench::exec_tag::impl::sync; +constexpr inline auto sync = nvbench::exec_tag::impl::no_block | nvbench::exec_tag::impl::sync; } // namespace nvbench::exec_tag diff --git a/nvbench/flags.cuh b/nvbench/flags.cuh index 30ba84e..cefefa3 100644 --- a/nvbench/flags.cuh +++ b/nvbench/flags.cuh @@ -20,24 +20,24 @@ #include -#define NVBENCH_DECLARE_FLAGS(T) \ - constexpr inline T operator|(T v1, T v2) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(static_cast(v1) | static_cast(v2)); \ - } \ - constexpr inline T operator&(T v1, T v2) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(static_cast(v1) & static_cast(v2)); \ - } \ - constexpr inline T operator^(T v1, T v2) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(static_cast(v1) ^ static_cast(v2)); \ - } \ - constexpr inline T operator~(T v1) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(~static_cast(v1)); \ +#define NVBENCH_DECLARE_FLAGS(T) \ + constexpr inline T operator|(T v1, T v2) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(static_cast(v1) | static_cast(v2)); \ + } \ + constexpr inline T operator&(T v1, T v2) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(static_cast(v1) & static_cast(v2)); \ + } \ + constexpr inline T operator^(T v1, T v2) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(static_cast(v1) ^ static_cast(v2)); \ + } \ + constexpr inline T operator~(T v1) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(~static_cast(v1)); \ } diff --git a/nvbench/float64_axis.cuh b/nvbench/float64_axis.cuh index 0d60651..947b581 100644 --- a/nvbench/float64_axis.cuh +++ b/nvbench/float64_axis.cuh @@ -36,20 +36,11 @@ struct float64_axis final : public axis_base ~float64_axis() final; - void set_inputs(std::vector inputs) - { - m_values = std::move(inputs); - } - [[nodiscard]] nvbench::float64_t get_value(std::size_t i) const - { - return m_values[i]; - } + void set_inputs(std::vector inputs) { m_values = std::move(inputs); } + [[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; } private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_values.size(); } std::string do_get_input_string(std::size_t i) const final; std::string do_get_description(std::size_t i) const final; diff --git a/nvbench/git_revision.cuh b/nvbench/git_revision.cuh index 2b29e92..50fc9da 100644 --- a/nvbench/git_revision.cuh +++ b/nvbench/git_revision.cuh @@ -1,20 +1,20 @@ /* -* Copyright 2021 NVIDIA Corporation -* -* Licensed under the Apache License, Version 2.0 with the LLVM exception -* (the "License"); you may not use this file except in compliance with -* the License. -* -* You may obtain a copy of the License at -* -* http://llvm.org/foundation/relicensing/LICENSE.txt -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once @@ -25,7 +25,5 @@ #define NVBENCH_GIT_SHA1 NVBench_GIT_SHA1 #define NVBENCH_GIT_VERSION NVBench_GIT_VERSION #ifdef NVBench_GIT_IS_DIRTY -# define NVBENCH_GIT_IS_DIRTY +#define NVBENCH_GIT_IS_DIRTY #endif - - diff --git a/nvbench/int64_axis.cuh b/nvbench/int64_axis.cuh index a6cec2e..baa7641 100644 --- a/nvbench/int64_axis.cuh +++ b/nvbench/int64_axis.cuh @@ -58,28 +58,18 @@ struct int64_axis final : public axis_base return static_cast(m_flags & int64_axis_flags::power_of_two); } - void set_inputs(std::vector inputs, - int64_axis_flags flags = int64_axis_flags::none); + void set_inputs(std::vector inputs, int64_axis_flags flags = int64_axis_flags::none); - [[nodiscard]] const std::vector &get_inputs() const - { - return m_inputs; - }; + [[nodiscard]] const std::vector &get_inputs() const { return m_inputs; }; [[nodiscard]] int64_t get_value(std::size_t i) const { return m_values[i]; }; - [[nodiscard]] const std::vector &get_values() const - { - return m_values; - }; + [[nodiscard]] const std::vector &get_values() const { return m_values; }; int64_axis_flags get_flags() const { return m_flags; } // Helper functions for pow2 conversions: - static nvbench::int64_t compute_pow2(nvbench::int64_t exponent) - { - return 1ll << exponent; - } + static nvbench::int64_t compute_pow2(nvbench::int64_t exponent) { return 1ll << exponent; } // UB if value < 0. static nvbench::int64_t compute_log2(nvbench::int64_t value) @@ -95,10 +85,7 @@ struct int64_axis final : public axis_base }; private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_inputs.size(); } std::string do_get_input_string(std::size_t) const final; std::string do_get_description(std::size_t) const final; diff --git a/nvbench/int64_axis.cxx b/nvbench/int64_axis.cxx index 24ff913..599c388 100644 --- a/nvbench/int64_axis.cxx +++ b/nvbench/int64_axis.cxx @@ -66,9 +66,8 @@ std::string int64_axis::do_get_input_string(std::size_t i) const std::string int64_axis::do_get_description(std::size_t i) const { - return this->is_power_of_two() - ? fmt::format("2^{} = {}", m_inputs[i], m_values[i]) - : std::string{}; + return this->is_power_of_two() ? fmt::format("2^{} = {}", m_inputs[i], m_values[i]) + : std::string{}; } std::string_view int64_axis::do_get_flags_as_string() const diff --git a/nvbench/internal/markdown_table.cuh b/nvbench/internal/markdown_table.cuh index bb721c6..518f57b 100644 --- a/nvbench/internal/markdown_table.cuh +++ b/nvbench/internal/markdown_table.cuh @@ -85,8 +85,7 @@ private: " {:^{}} ", col.header, col.max_width); - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); } return fmt::format_to(iter, "\n"); } @@ -102,8 +101,7 @@ private: "{:-^{}}", "", col.max_width + 2); - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); } return fmt::format_to(iter, "\n"); } @@ -116,8 +114,7 @@ private: for (std::size_t row = 0; row < m_num_rows; ++row) { - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); for (const column &col : m_columns) { iter = fmt::format_to(iter, @@ -125,8 +122,7 @@ private: " {:>{}} ", col.rows[row], col.max_width); - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); } // cols iter = fmt::format_to(iter, "\n"); diff --git a/nvbench/internal/nvml.cuh b/nvbench/internal/nvml.cuh index 497f31a..963c381 100644 --- a/nvbench/internal/nvml.cuh +++ b/nvbench/internal/nvml.cuh @@ -74,10 +74,7 @@ struct call_failed : error [[nodiscard]] nvmlReturn_t get_error_code() const { return m_error_code; } - [[nodiscard]] const std::string &get_error_string() const - { - return m_error_string; - } + [[nodiscard]] const std::string &get_error_string() const { return m_error_string; } private: nvmlReturn_t m_error_code; @@ -90,30 +87,26 @@ private: #ifdef NVBENCH_HAS_NVML -#define NVBENCH_NVML_CALL(call) \ - do \ - { \ - const auto _rr = call; \ - if (_rr != NVML_SUCCESS) \ - { \ - throw nvbench::nvml::call_failed(__FILE__, \ - __LINE__, \ - #call, \ - _rr, \ - nvmlErrorString(_rr)); \ - } \ +#define NVBENCH_NVML_CALL(call) \ + do \ + { \ + const auto _rr = call; \ + if (_rr != NVML_SUCCESS) \ + { \ + throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, nvmlErrorString(_rr)); \ + } \ } while (false) // Same as above, but used for nvmlInit(), where a failure means that // nvmlErrorString is not available. -#define NVBENCH_NVML_CALL_NO_API(call) \ - do \ - { \ - const auto _rr = call; \ - if (_rr != NVML_SUCCESS) \ - { \ - throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, ""); \ - } \ +#define NVBENCH_NVML_CALL_NO_API(call) \ + do \ + { \ + const auto _rr = call; \ + if (_rr != NVML_SUCCESS) \ + { \ + throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, ""); \ + } \ } while (false) #endif // NVBENCH_HAS_NVML diff --git a/nvbench/internal/table_builder.cuh b/nvbench/internal/table_builder.cuh index 81fca0a..ae029b4 100644 --- a/nvbench/internal/table_builder.cuh +++ b/nvbench/internal/table_builder.cuh @@ -47,18 +47,14 @@ struct table_builder const std::string &header, std::string value) { - auto iter = std::find_if(m_columns.begin(), - m_columns.end(), - [&column_key](const column &col) { - return col.key == column_key; - }); + auto iter = std::find_if(m_columns.begin(), m_columns.end(), [&column_key](const column &col) { + return col.key == column_key; + }); auto &col = iter == m_columns.end() - ? m_columns.emplace_back(column{column_key, - header, - std::vector{}, - header.size()}) - : *iter; + ? m_columns.emplace_back( + column{column_key, header, std::vector{}, header.size()}) + : *iter; col.max_width = std::max(col.max_width, value.size()); if (col.rows.size() <= row) @@ -76,11 +72,9 @@ struct table_builder std::size_t{}, [](const auto &a, const auto &b) { return a > b ? a : b; }, [](const column &col) { return col.rows.size(); }); - std::for_each(m_columns.begin(), - m_columns.end(), - [num_rows = m_num_rows](column &col) { - col.rows.resize(num_rows); - }); + std::for_each(m_columns.begin(), m_columns.end(), [num_rows = m_num_rows](column &col) { + col.rows.resize(num_rows); + }); } }; diff --git a/nvbench/json_printer.cu b/nvbench/json_printer.cu index 7c99f2a..b78e5be 100644 --- a/nvbench/json_printer.cu +++ b/nvbench/json_printer.cu @@ -126,11 +126,10 @@ std::string json_printer::version_t::get_string() const return fmt::format("{}.{}.{}", this->major, this->minor, this->patch); } -void json_printer::do_process_bulk_data_float64( - state &state, - const std::string &tag, - const std::string &hint, - const std::vector &data) +void json_printer::do_process_bulk_data_float64(state &state, + const std::string &tag, + const std::string &hint, + const std::vector &data) { printer_base::do_process_bulk_data_float64(state, tag, hint, data); @@ -157,16 +156,12 @@ void json_printer::do_process_bulk_data_float64( { if (!fs::create_directory(result_path)) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Failed to create result directory '{}'."); + NVBENCH_THROW(std::runtime_error, "{}", "Failed to create result directory '{}'."); } } else if (!fs::is_directory(result_path)) { - NVBENCH_THROW(std::runtime_error, - "{}", - "'{}' exists and is not a directory."); + NVBENCH_THROW(std::runtime_error, "{}", "'{}' exists and is not a directory."); } const auto file_id = m_num_jsonbin_files++; @@ -197,16 +192,12 @@ void json_printer::do_process_bulk_data_float64( } catch (std::exception &e) { - if (auto printer_opt_ref = state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); - printer.log(nvbench::log_level::warn, - fmt::format("Error writing {} ({}) to {}: {}", - tag, - hint, - result_path.string(), - e.what())); + printer.log( + nvbench::log_level::warn, + fmt::format("Error writing {} ({}) to {}: {}", tag, hint, result_path.string(), e.what())); } } // end catch @@ -221,14 +212,12 @@ void json_printer::do_process_bulk_data_float64( summ.set_string("hide", "Not needed in table."); timer.stop(); - if (auto printer_opt_ref = state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); - printer.log(nvbench::log_level::info, - fmt::format("Wrote '{}' in {:>6.3f}ms", - result_path.string(), - timer.get_duration() * 1000)); + printer.log( + nvbench::log_level::info, + fmt::format("Wrote '{}' in {:>6.3f}ms", result_path.string(), timer.get_duration() * 1000)); } } // end hint == sample_times } @@ -304,15 +293,12 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) device["global_memory_size"] = dev_info.get_global_memory_size(); device["global_memory_bus_peak_clock_rate"] = dev_info.get_global_memory_bus_peak_clock_rate(); - device["global_memory_bus_width"] = - dev_info.get_global_memory_bus_width(); - device["global_memory_bus_bandwidth"] = - dev_info.get_global_memory_bus_bandwidth(); - device["l2_cache_size"] = dev_info.get_l2_cache_size(); - device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); - device["shared_memory_per_block"] = - dev_info.get_shared_memory_per_block(); - device["ecc_state"] = dev_info.get_ecc_state(); + device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width(); + device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth(); + device["l2_cache_size"] = dev_info.get_l2_cache_size(); + device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); + device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block(); + device["ecc_state"] = dev_info.get_ecc_state(); } } // "devices" @@ -358,23 +344,19 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) switch (axis_ptr->get_type()) { case nvbench::axis_type::type: - value["is_active"] = - static_cast(*axis_ptr).get_is_active(i); + value["is_active"] = static_cast(*axis_ptr).get_is_active(i); break; case nvbench::axis_type::int64: - value["value"] = - static_cast(*axis_ptr).get_value(i); + value["value"] = static_cast(*axis_ptr).get_value(i); break; case nvbench::axis_type::float64: - value["value"] = - static_cast(*axis_ptr).get_value(i); + value["value"] = static_cast(*axis_ptr).get_value(i); break; case nvbench::axis_type::string: - value["value"] = - static_cast(*axis_ptr).get_value(i); + value["value"] = static_cast(*axis_ptr).get_value(i); break; default: break; diff --git a/nvbench/json_printer.cuh b/nvbench/json_printer.cuh index a58448f..394efb0 100644 --- a/nvbench/json_printer.cuh +++ b/nvbench/json_printer.cuh @@ -38,9 +38,7 @@ struct json_printer : nvbench::printer_base { using printer_base::printer_base; - json_printer(std::ostream &stream, - std::string stream_name, - bool enable_binary_output) + json_printer(std::ostream &stream, std::string stream_name, bool enable_binary_output) : printer_base(stream, std::move(stream_name)) , m_enable_binary_output{enable_binary_output} {} @@ -59,23 +57,16 @@ struct json_printer : nvbench::printer_base [[nodiscard]] static version_t get_json_file_version(); - [[nodiscard]] bool get_enable_binary_output() const - { - return m_enable_binary_output; - } + [[nodiscard]] bool get_enable_binary_output() const { return m_enable_binary_output; } void set_enable_binary_output(bool b) { m_enable_binary_output = b; } protected: // Virtual API from printer_base: - void do_log_argv(const std::vector& argv) override - { - m_argv = argv; - } - void do_process_bulk_data_float64( - nvbench::state &state, - const std::string &tag, - const std::string &hint, - const std::vector &data) override; + void do_log_argv(const std::vector &argv) override { m_argv = argv; } + void do_process_bulk_data_float64(nvbench::state &state, + const std::string &tag, + const std::string &hint, + const std::vector &data) override; void do_print_benchmark_results(const benchmark_vector &benches) override; bool m_enable_binary_output{false}; diff --git a/nvbench/launch.cuh b/nvbench/launch.cuh index 4b973f3..0cb4961 100644 --- a/nvbench/launch.cuh +++ b/nvbench/launch.cuh @@ -42,20 +42,17 @@ struct launch {} // move-only - launch(const launch &) = delete; - launch(launch &&) = default; + launch(const launch &) = delete; + launch(launch &&) = default; launch &operator=(const launch &) = delete; - launch &operator=(launch &&) = default; + launch &operator=(launch &&) = default; /** * @return a CUDA stream that all kernels and other stream-ordered CUDA work * must use. This stream can be changed by the `KernelGenerator` using the * `nvbench::state::set_cuda_stream` method. */ - __forceinline__ const nvbench::cuda_stream &get_stream() const - { - return m_stream; - }; + __forceinline__ const nvbench::cuda_stream &get_stream() const { return m_stream; }; private: // The stream is owned by the `nvbench::state` associated with this launch. diff --git a/nvbench/main.cuh b/nvbench/main.cuh index 4c1588c..0ba82d7 100644 --- a/nvbench/main.cuh +++ b/nvbench/main.cuh @@ -27,23 +27,23 @@ #include -#define NVBENCH_MAIN \ - int main(int argc, char const *const *argv) \ - try \ - { \ - NVBENCH_MAIN_BODY(argc, argv); \ - NVBENCH_CUDA_CALL(cudaDeviceReset()); \ - return 0; \ - } \ - catch (std::exception & e) \ - { \ - std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; \ - return 1; \ - } \ - catch (...) \ - { \ - std::cerr << "\nNVBench encountered an unknown error.\n"; \ - return 1; \ +#define NVBENCH_MAIN \ + int main(int argc, char const *const *argv) \ + try \ + { \ + NVBENCH_MAIN_BODY(argc, argv); \ + NVBENCH_CUDA_CALL(cudaDeviceReset()); \ + return 0; \ + } \ + catch (std::exception & e) \ + { \ + std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; \ + return 1; \ + } \ + catch (...) \ + { \ + std::cerr << "\nNVBench encountered an unknown error.\n"; \ + return 1; \ } #ifdef NVBENCH_HAS_CUPTI @@ -54,35 +54,35 @@ // clang-format on #endif -#define NVBENCH_MAIN_PARSE(argc, argv) \ - nvbench::option_parser parser; \ +#define NVBENCH_MAIN_PARSE(argc, argv) \ + nvbench::option_parser parser; \ parser.parse(argc, argv) -#define NVBENCH_MAIN_BODY(argc, argv) \ - do \ - { \ - NVBENCH_INITIALIZE_DRIVER_API; \ - NVBENCH_MAIN_PARSE(argc, argv); \ - auto &printer = parser.get_printer(); \ - \ - printer.print_device_info(); \ - printer.print_log_preamble(); \ - auto &benchmarks = parser.get_benchmarks(); \ - \ - std::size_t total_states = 0; \ - for (auto &bench_ptr : benchmarks) \ - { \ - total_states += bench_ptr->get_config_count(); \ - } \ - printer.set_total_state_count(total_states); \ - \ - printer.set_completed_state_count(0); \ - for (auto &bench_ptr : benchmarks) \ - { \ - bench_ptr->set_printer(printer); \ - bench_ptr->run(); \ - bench_ptr->clear_printer(); \ - } \ - printer.print_log_epilogue(); \ - printer.print_benchmark_results(benchmarks); \ +#define NVBENCH_MAIN_BODY(argc, argv) \ + do \ + { \ + NVBENCH_INITIALIZE_DRIVER_API; \ + NVBENCH_MAIN_PARSE(argc, argv); \ + auto &printer = parser.get_printer(); \ + \ + printer.print_device_info(); \ + printer.print_log_preamble(); \ + auto &benchmarks = parser.get_benchmarks(); \ + \ + std::size_t total_states = 0; \ + for (auto &bench_ptr : benchmarks) \ + { \ + total_states += bench_ptr->get_config_count(); \ + } \ + printer.set_total_state_count(total_states); \ + \ + printer.set_completed_state_count(0); \ + for (auto &bench_ptr : benchmarks) \ + { \ + bench_ptr->set_printer(printer); \ + bench_ptr->run(); \ + bench_ptr->clear_printer(); \ + } \ + printer.print_log_epilogue(); \ + printer.print_benchmark_results(benchmarks); \ } while (false) diff --git a/nvbench/markdown_printer.cu b/nvbench/markdown_printer.cu index 276ca86..94151d4 100644 --- a/nvbench/markdown_printer.cu +++ b/nvbench/markdown_printer.cu @@ -44,9 +44,8 @@ void markdown_printer::do_print_device_info() fmt::format_to(buffer, "# Devices\n\n"); const auto &device_mgr = nvbench::device_manager::get(); - const auto &devices = device_mgr.get_number_of_used_devices() > 0 - ? device_mgr.get_used_devices() - : device_mgr.get_devices(); + const auto &devices = device_mgr.get_number_of_used_devices() > 0 ? device_mgr.get_used_devices() + : device_mgr.get_devices(); for (const auto &device : devices) { const auto [gmem_free, gmem_used] = device.get_global_memory_usage(); @@ -64,22 +63,17 @@ void markdown_printer::do_print_device_info() "* Global Memory: {} MiB Free / {} MiB Total\n", gmem_free / 1024 / 1024, gmem_used / 1024 / 1024); - fmt::format_to( - buffer, - "* Global Memory Bus Peak: {} GB/sec ({}-bit DDR @{}MHz)\n", - device.get_global_memory_bus_bandwidth() / 1000 / 1000 / 1000, - device.get_global_memory_bus_width(), - device.get_global_memory_bus_peak_clock_rate() / 1000 / 1000); + fmt::format_to(buffer, + "* Global Memory Bus Peak: {} GB/sec ({}-bit DDR @{}MHz)\n", + device.get_global_memory_bus_bandwidth() / 1000 / 1000 / 1000, + device.get_global_memory_bus_width(), + device.get_global_memory_bus_peak_clock_rate() / 1000 / 1000); fmt::format_to(buffer, "* Max Shared Memory: {} KiB/SM, {} KiB/Block\n", device.get_shared_memory_per_sm() / 1024, device.get_shared_memory_per_block() / 1024); - fmt::format_to(buffer, - "* L2 Cache Size: {} KiB\n", - device.get_l2_cache_size() / 1024); - fmt::format_to(buffer, - "* Maximum Active Blocks: {}/SM\n", - device.get_max_blocks_per_sm()); + fmt::format_to(buffer, "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024); + fmt::format_to(buffer, "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm()); fmt::format_to(buffer, "* Maximum Active Threads: {}/SM, {}/Block\n", device.get_max_threads_per_sm(), @@ -88,18 +82,13 @@ void markdown_printer::do_print_device_info() "* Available Registers: {}/SM, {}/Block\n", device.get_registers_per_sm(), device.get_registers_per_block()); - fmt::format_to(buffer, - "* ECC Enabled: {}\n", - device.get_ecc_state() ? "Yes" : "No"); + fmt::format_to(buffer, "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No"); fmt::format_to(buffer, "\n"); } m_ostream << fmt::to_string(buffer); } -void markdown_printer::do_print_log_preamble() -{ - m_ostream << "# Log\n\n```\n"; -} +void markdown_printer::do_print_log_preamble() { m_ostream << "# Log\n\n```\n"; } void markdown_printer::do_print_log_epilogue() { m_ostream << "```\n\n"; } @@ -147,8 +136,7 @@ void markdown_printer::do_log_run_state(const nvbench::state &exec_state) { if (m_total_state_count == 0) { // No progress info - this->log(nvbench::log_level::run, - exec_state.get_short_description(m_color)); + this->log(nvbench::log_level::run, exec_state.get_short_description(m_color)); } else { // Add progress @@ -160,8 +148,7 @@ void markdown_printer::do_log_run_state(const nvbench::state &exec_state) } } -void markdown_printer::do_print_benchmark_list( - const printer_base::benchmark_vector &benches) +void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vector &benches) { if (benches.empty()) { @@ -204,10 +191,7 @@ void markdown_printer::do_print_benchmark_list( { desc = fmt::format(" ({})", desc); } - fmt::format_to(buffer, - " * `{}`{}\n", - axis_ptr->get_input_string(i), - desc); + fmt::format_to(buffer, " * `{}`{}\n", axis_ptr->get_input_string(i), desc); } // end foreach value } // end foreach axis fmt::format_to(buffer, "\n"); @@ -216,8 +200,7 @@ void markdown_printer::do_print_benchmark_list( m_ostream << fmt::to_string(buffer); } -void markdown_printer::do_print_benchmark_results( - const printer_base::benchmark_vector &benches) +void markdown_printer::do_print_benchmark_results(const printer_base::benchmark_vector &benches) { auto format_visitor = [](const auto &v) { using T = std::decay_t; @@ -252,19 +235,15 @@ void markdown_printer::do_print_benchmark_results( // Do a single pass when no devices are specified. This happens for // benchmarks with `cpu` exec_tags. const std::size_t num_device_passes = devices.empty() ? 1 : devices.size(); - for (std::size_t device_pass = 0; device_pass < num_device_passes; - ++device_pass) + for (std::size_t device_pass = 0; device_pass < num_device_passes; ++device_pass) { - std::optional device = - devices.empty() ? std::nullopt - : std::make_optional(devices[device_pass]); + std::optional device = devices.empty() + ? std::nullopt + : std::make_optional(devices[device_pass]); if (device) { - fmt::format_to(buffer, - "\n### [{}] {}\n\n", - device->get_id(), - device->get_name()); + fmt::format_to(buffer, "\n### [{}] {}\n\n", device->get_id(), device->get_name()); } std::size_t row = 0; @@ -288,15 +267,11 @@ void markdown_printer::do_print_benchmark_results( { const nvbench::int64_t value = axis_values.get_int64(name); const nvbench::int64_t exponent = int64_axis::compute_log2(value); - table.add_cell(row, - name, - name, - fmt::format("2^{} = {}", exponent, value)); + table.add_cell(row, name, name, fmt::format("2^{} = {}", exponent, value)); } else { - std::string value = std::visit(format_visitor, - axis_values.get_value(name)); + std::string value = std::visit(format_visitor, axis_values.get_value(name)); table.add_cell(row, name + "_axis", name, std::move(value)); } } @@ -308,12 +283,9 @@ void markdown_printer::do_print_benchmark_results( continue; } const std::string &tag = summ.get_tag(); - const std::string &header = summ.has_value("name") - ? summ.get_string("name") - : tag; + const std::string &header = summ.has_value("name") ? summ.get_string("name") : tag; - std::string hint = summ.has_value("hint") ? summ.get_string("hint") - : std::string{}; + std::string hint = summ.has_value("hint") ? summ.get_string("hint") : std::string{}; if (hint == "duration") { table.add_cell(row, tag, header, this->do_format_duration(summ)); @@ -332,10 +304,7 @@ void markdown_printer::do_print_benchmark_results( } else if (hint == "sample_size") { - table.add_cell(row, - tag, - header, - this->do_format_sample_size(summ)); + table.add_cell(row, tag, header, this->do_format_sample_size(summ)); } else if (hint == "percentage") { @@ -353,8 +322,7 @@ void markdown_printer::do_print_benchmark_results( auto table_str = table.to_string(); fmt::format_to(buffer, "{}", - table_str.empty() ? "No data -- check log.\n" - : std::move(table_str)); + table_str.empty() ? "No data -- check log.\n" : std::move(table_str)); } // end foreach device_pass } diff --git a/nvbench/named_values.cuh b/nvbench/named_values.cuh index c11dab4..1ce5185 100644 --- a/nvbench/named_values.cuh +++ b/nvbench/named_values.cuh @@ -33,8 +33,7 @@ namespace nvbench */ struct named_values { - using value_type = - std::variant; + using value_type = std::variant; enum class type { @@ -43,7 +42,7 @@ struct named_values string }; - void append(const named_values& other); + void append(const named_values &other); [[nodiscard]] std::size_t get_size() const; [[nodiscard]] std::vector get_names() const; @@ -60,11 +59,11 @@ struct named_values [[nodiscard]] type get_type(const std::string &name) const; [[nodiscard]] bool has_value(const std::string &name) const; - [[nodiscard]] const value_type& get_value(const std::string &name) const; + [[nodiscard]] const value_type &get_value(const std::string &name) const; void clear(); - void remove_value(const std::string& name); + void remove_value(const std::string &name); private: struct named_value diff --git a/nvbench/named_values.cxx b/nvbench/named_values.cxx index 1aeb4dc..605789e 100644 --- a/nvbench/named_values.cxx +++ b/nvbench/named_values.cxx @@ -33,9 +33,7 @@ namespace nvbench void named_values::append(const named_values &other) { - m_storage.insert(m_storage.end(), - other.m_storage.cbegin(), - other.m_storage.cend()); + m_storage.insert(m_storage.end(), other.m_storage.cbegin(), other.m_storage.cend()); } void named_values::clear() { m_storage.clear(); } @@ -55,20 +53,17 @@ std::vector named_values::get_names() const bool named_values::has_value(const std::string &name) const { - auto iter = - std::find_if(m_storage.cbegin(), - m_storage.cend(), - [&name](const auto &val) { return val.name == name; }); + auto iter = std::find_if(m_storage.cbegin(), m_storage.cend(), [&name](const auto &val) { + return val.name == name; + }); return iter != m_storage.cend(); } -const named_values::value_type & -named_values::get_value(const std::string &name) const +const named_values::value_type &named_values::get_value(const std::string &name) const { - auto iter = - std::find_if(m_storage.cbegin(), - m_storage.cend(), - [&name](const auto &val) { return val.name == name; }); + auto iter = std::find_if(m_storage.cbegin(), m_storage.cend(), [&name](const auto &val) { + return val.name == name; + }); if (iter == m_storage.cend()) { NVBENCH_THROW(std::runtime_error, "No value with name '{}'.", name); @@ -96,9 +91,7 @@ named_values::type named_values::get_type(const std::string &name) const // warning C4702: unreachable code // This is a future-proofing check, it'll be reachable if something breaks NVBENCH_MSVC_PUSH_DISABLE_WARNING(4702) - NVBENCH_THROW(std::runtime_error, - "Unknown variant type for entry '{}'.", - name); + NVBENCH_THROW(std::runtime_error, "Unknown variant type for entry '{}'.", name); }, this->get_value(name)); NVBENCH_MSVC_POP_WARNING() @@ -111,10 +104,7 @@ try } catch (std::exception &err) { - NVBENCH_THROW(std::runtime_error, - "Error looking up int64 value `{}`:\n{}", - name, - err.what()); + NVBENCH_THROW(std::runtime_error, "Error looking up int64 value `{}`:\n{}", name, err.what()); } nvbench::float64_t named_values::get_float64(const std::string &name) const @@ -124,10 +114,7 @@ try } catch (std::exception &err) { - NVBENCH_THROW(std::runtime_error, - "Error looking up float64 value `{}`:\n{}", - name, - err.what()); + NVBENCH_THROW(std::runtime_error, "Error looking up float64 value `{}`:\n{}", name, err.what()); } const std::string &named_values::get_string(const std::string &name) const @@ -137,10 +124,7 @@ try } catch (std::exception &err) { - NVBENCH_THROW(std::runtime_error, - "Error looking up string value `{}`:\n{}", - name, - err.what()); + NVBENCH_THROW(std::runtime_error, "Error looking up string value `{}`:\n{}", name, err.what()); } void named_values::set_int64(std::string name, nvbench::int64_t value) @@ -165,10 +149,9 @@ void named_values::set_value(std::string name, named_values::value_type value) void named_values::remove_value(const std::string &name) { - auto iter = - std::find_if(m_storage.begin(), m_storage.end(), [&name](const auto &val) { - return val.name == name; - }); + auto iter = std::find_if(m_storage.begin(), m_storage.end(), [&name](const auto &val) { + return val.name == name; + }); if (iter != m_storage.end()) { m_storage.erase(iter); diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index 55f7f1c..744479d 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -82,20 +82,11 @@ std::string_view submatch_to_sv(const sv_submatch &in) // // So we're stuck with materializing a std::string and calling std::stoX(). Ah // well. At least it's not istream. -void parse(std::string_view input, nvbench::int32_t &val) -{ - val = std::stoi(std::string(input)); -} +void parse(std::string_view input, nvbench::int32_t &val) { val = std::stoi(std::string(input)); } -void parse(std::string_view input, nvbench::int64_t &val) -{ - val = std::stoll(std::string(input)); -} +void parse(std::string_view input, nvbench::int64_t &val) { val = std::stoll(std::string(input)); } -void parse(std::string_view input, nvbench::float64_t &val) -{ - val = std::stod(std::string(input)); -} +void parse(std::string_view input, nvbench::float64_t &val) { val = std::stod(std::string(input)); } void parse(std::string_view input, std::string &val) { val = input; } @@ -112,9 +103,8 @@ std::vector parse_list_values(std::string_view list_spec) "(?:,|$)" // Delimiters }; - auto values_begin = - sv_regex_iterator(list_spec.cbegin(), list_spec.cend(), value_regex); - auto values_end = sv_regex_iterator{}; + auto values_begin = sv_regex_iterator(list_spec.cbegin(), list_spec.cend(), value_regex); + auto values_end = sv_regex_iterator{}; while (values_begin != values_end) { auto match = *values_begin++; @@ -131,8 +121,7 @@ std::vector parse_list_values(std::string_view list_spec) // Parses a range specification " : [ : ]" and returns // a vector filled with the specified range. template -std::vector parse_range_values(std::string_view range_spec, - nvbench::wrapped_type) +std::vector parse_range_values(std::string_view range_spec, nvbench::wrapped_type) { std::vector range_params; @@ -143,9 +132,8 @@ std::vector parse_range_values(std::string_view range_spec, "(?:$|:)" // Delimiters }; - auto values_begin = - sv_regex_iterator(range_spec.cbegin(), range_spec.cend(), value_regex); - auto values_end = sv_regex_iterator{}; + auto values_begin = sv_regex_iterator(range_spec.cbegin(), range_spec.cend(), value_regex); + auto values_end = sv_regex_iterator{}; for (; values_begin != values_end; ++values_begin) { auto match = *values_begin; @@ -221,25 +209,15 @@ std::vector parse_values(std::string_view value_spec) "$"}; // EOS sv_match match; - if (std::regex_search(value_spec.cbegin(), - value_spec.cend(), - match, - list_regex)) + if (std::regex_search(value_spec.cbegin(), value_spec.cend(), match, list_regex)) { return parse_list_values(submatch_to_sv(match[1])); } - else if (std::regex_search(value_spec.cbegin(), - value_spec.cend(), - match, - range_regex)) + else if (std::regex_search(value_spec.cbegin(), value_spec.cend(), match, range_regex)) { - return parse_range_values(submatch_to_sv(match[1]), - nvbench::wrapped_type{}); + return parse_range_values(submatch_to_sv(match[1]), nvbench::wrapped_type{}); } - else if (std::regex_search(value_spec.cbegin(), - value_spec.cend(), - match, - single_regex)) + else if (std::regex_search(value_spec.cbegin(), value_spec.cend(), match, single_regex)) { T val; parse(submatch_to_sv(match[1]), val); @@ -247,9 +225,7 @@ std::vector parse_values(std::string_view value_spec) } else { - NVBENCH_THROW(std::runtime_error, - "Invalid axis value spec: {}", - value_spec); + NVBENCH_THROW(std::runtime_error, "Invalid axis value spec: {}", value_spec); } } @@ -514,8 +490,8 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->update_int64_prop(first[0], first[1]); first += 2; } - else if (arg == "--min-time" || arg == "--max-noise" || - arg == "--skip-time" || arg == "--timeout") + else if (arg == "--min-time" || arg == "--max-noise" || arg == "--skip-time" || + arg == "--timeout") { check_params(1); this->update_float64_prop(first[0], first[1]); @@ -523,9 +499,7 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, } else { - NVBENCH_THROW(std::runtime_error, - "Unrecognized command-line argument: `{}`.", - arg); + NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg); } } } @@ -534,7 +508,7 @@ void option_parser::add_markdown_printer(const std::string &spec) try { std::ostream &stream = this->printer_spec_to_ostream(spec); - auto &printer = m_printer.emplace(stream, spec); + auto &printer = m_printer.emplace(stream, spec); if (spec == "stdout") { printer.set_color(m_color_md_stdout_printer); @@ -556,14 +530,10 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error while adding csv output for `{}`:\n{}", - spec, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error while adding csv output for `{}`:\n{}", spec, e.what()); } -void option_parser::add_json_printer(const std::string &spec, - bool enable_binary) +void option_parser::add_json_printer(const std::string &spec, bool enable_binary) try { std::ostream &stream = this->printer_spec_to_ostream(spec); @@ -624,10 +594,7 @@ void option_parser::print_help() const fmt::print("{}\n{}\n", ::cli_help_text, ::cli_help_axis_text); } -void option_parser::print_help_axis() const -{ - fmt::print("{}\n", ::cli_help_axis_text); -} +void option_parser::print_help_axis() const { fmt::print("{}\n", ::cli_help_axis_text); } void option_parser::set_persistence_mode(const std::string &state) try @@ -685,9 +652,7 @@ try { if (rate_val == nvbench::device_info::clock_rate::none) { - fmt::print("Unlocking clocks for device '{}' ({}).\n", - device.get_name(), - device.get_id()); + fmt::print("Unlocking clocks for device '{}' ({}).\n", device.get_name(), device.get_id()); } else { @@ -757,16 +722,12 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error handling option --benchmark `{}`:\n{}", - name, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error handling option --benchmark `{}`:\n{}", name, e.what()); } void option_parser::replay_global_args() { - this->parse_range(m_global_benchmark_args.cbegin(), - m_global_benchmark_args.cend()); + this->parse_range(m_global_benchmark_args.cbegin(), m_global_benchmark_args.cend()); } void option_parser::update_devices(const std::string &devices) @@ -790,10 +751,7 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error handling option --devices `{}`:\n{}", - devices, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error handling option --devices `{}`:\n{}", devices, e.what()); } void option_parser::update_axis(const std::string &spec) @@ -832,28 +790,20 @@ try switch (axis.get_type()) { case axis_type::type: - this->update_type_axis(static_cast(axis), - values, - flags); + this->update_type_axis(static_cast(axis), values, flags); break; case axis_type::int64: - this->update_int64_axis(static_cast(axis), - values, - flags); + this->update_int64_axis(static_cast(axis), values, flags); break; case axis_type::float64: - this->update_float64_axis(static_cast(axis), - values, - flags); + this->update_float64_axis(static_cast(axis), values, flags); break; case axis_type::string: - this->update_string_axis(static_cast(axis), - values, - flags); + this->update_string_axis(static_cast(axis), values, flags); break; @@ -866,10 +816,7 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error handling option --axis `{}`:\n{}", - spec, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error handling option --axis `{}`:\n{}", spec, e.what()); } void option_parser::update_int64_axis(int64_axis &axis, @@ -888,9 +835,7 @@ void option_parser::update_int64_axis(int64_axis &axis, } else { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for int64 axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for int64 axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -905,9 +850,7 @@ void option_parser::update_float64_axis(float64_axis &axis, // Validate flags: if (!flag_spec.empty()) { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for float64 axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for float64 axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -922,9 +865,7 @@ void option_parser::update_string_axis(string_axis &axis, // Validate flags: if (!flag_spec.empty()) { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for string axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for string axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -939,9 +880,7 @@ void option_parser::update_type_axis(type_axis &axis, // Validate flags: if (!flag_spec.empty()) { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for type axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for type axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -949,8 +888,7 @@ void option_parser::update_type_axis(type_axis &axis, axis.set_active_inputs(input_values); } -void option_parser::update_int64_prop(const std::string &prop_arg, - const std::string &prop_val) +void option_parser::update_int64_prop(const std::string &prop_arg, const std::string &prop_val) try { // If no active benchmark, save args as global. @@ -983,8 +921,7 @@ catch (std::exception &e) e.what()); } -void option_parser::update_float64_prop(const std::string &prop_arg, - const std::string &prop_val) +void option_parser::update_float64_prop(const std::string &prop_arg, const std::string &prop_val) try { // If no active benchmark, save args as global. diff --git a/nvbench/option_parser.cuh b/nvbench/option_parser.cuh index e35d724..c183764 100644 --- a/nvbench/option_parser.cuh +++ b/nvbench/option_parser.cuh @@ -41,8 +41,7 @@ struct type_axis; */ struct option_parser { - using benchmark_vector = - std::vector>; + using benchmark_vector = std::vector>; option_parser(); ~option_parser(); @@ -51,15 +50,9 @@ struct option_parser void parse(std::vector args); [[nodiscard]] benchmark_vector &get_benchmarks() { return m_benchmarks; }; - [[nodiscard]] const benchmark_vector &get_benchmarks() const - { - return m_benchmarks; - }; + [[nodiscard]] const benchmark_vector &get_benchmarks() const { return m_benchmarks; }; - [[nodiscard]] const std::vector &get_args() const - { - return m_args; - } + [[nodiscard]] const std::vector &get_args() const { return m_args; } /*! * Returns the output format requested by the parse options. @@ -115,10 +108,8 @@ private: std::string_view value_spec, std::string_view flag_spec); - void update_int64_prop(const std::string &prop_arg, - const std::string &prop_val); - void update_float64_prop(const std::string &prop_arg, - const std::string &prop_val); + void update_int64_prop(const std::string &prop_arg, const std::string &prop_val); + void update_float64_prop(const std::string &prop_arg, const std::string &prop_val); void update_used_device_state() const; diff --git a/nvbench/printer_base.cuh b/nvbench/printer_base.cuh index 0e28a35..3de8874 100644 --- a/nvbench/printer_base.cuh +++ b/nvbench/printer_base.cuh @@ -76,19 +76,16 @@ struct printer_base virtual ~printer_base(); // move-only - printer_base(const printer_base &) = delete; - printer_base(printer_base &&) = default; + printer_base(const printer_base &) = delete; + printer_base(printer_base &&) = default; printer_base &operator=(const printer_base &) = delete; - printer_base &operator=(printer_base &&) = default; + printer_base &operator=(printer_base &&) = default; /*! * Called once with the command line arguments used to invoke the current * executable. */ - void log_argv(const std::vector &argv) - { - this->do_log_argv(argv); - } + void log_argv(const std::vector &argv) { this->do_log_argv(argv); } /*! * Print a summary of all detected devices, if supported. @@ -108,19 +105,13 @@ struct printer_base /*! * Print a log message at the specified log level. */ - void log(nvbench::log_level level, const std::string &msg) - { - this->do_log(level, msg); - } + void log(nvbench::log_level level, const std::string &msg) { this->do_log(level, msg); } /*! * Called before running the measurements associated with state. * Implementations are expected to call `log(log_level::run, ...)`. */ - void log_run_state(const nvbench::state &exec_state) - { - this->do_log_run_state(exec_state); - } + void log_run_state(const nvbench::state &exec_state) { this->do_log_run_state(exec_state); } /*! * Measurements may call this to allow a printer to perform extra processing @@ -181,10 +172,7 @@ struct printer_base return this->do_get_completed_state_count(); } - virtual void set_total_state_count(std::size_t states) - { - this->do_set_total_state_count(states); - } + virtual void set_total_state_count(std::size_t states) { this->do_set_total_state_count(states); } [[nodiscard]] virtual std::size_t get_total_state_count() const { return this->do_get_total_state_count(); @@ -193,17 +181,16 @@ struct printer_base protected: // Implementation hooks for subclasses: - virtual void do_log_argv(const std::vector&) {} + virtual void do_log_argv(const std::vector &) {} virtual void do_print_device_info() {} virtual void do_print_log_preamble() {} virtual void do_print_log_epilogue() {} virtual void do_log(nvbench::log_level, const std::string &) {} virtual void do_log_run_state(const nvbench::state &) {} - virtual void - do_process_bulk_data_float64(nvbench::state &, - const std::string &, - const std::string &, - const std::vector &){}; + virtual void do_process_bulk_data_float64(nvbench::state &, + const std::string &, + const std::string &, + const std::vector &){}; virtual void do_print_benchmark_list(const benchmark_vector &) {} virtual void do_print_benchmark_results(const benchmark_vector &) {} diff --git a/nvbench/printer_base.cxx b/nvbench/printer_base.cxx index 66de795..639edc2 100644 --- a/nvbench/printer_base.cxx +++ b/nvbench/printer_base.cxx @@ -38,19 +38,10 @@ void printer_base::do_set_completed_state_count(std::size_t states) void printer_base::do_add_completed_state() { ++m_completed_state_count; } -std::size_t printer_base::do_get_completed_state_count() const -{ - return m_completed_state_count; -} +std::size_t printer_base::do_get_completed_state_count() const { return m_completed_state_count; } -void printer_base::do_set_total_state_count(std::size_t states) -{ - m_total_state_count = states; -} +void printer_base::do_set_total_state_count(std::size_t states) { m_total_state_count = states; } -std::size_t printer_base::do_get_total_state_count() const -{ - return m_total_state_count; -} +std::size_t printer_base::do_get_total_state_count() const { return m_total_state_count; } } // namespace nvbench diff --git a/nvbench/printer_multiplex.cuh b/nvbench/printer_multiplex.cuh index f32a0e9..797b480 100644 --- a/nvbench/printer_multiplex.cuh +++ b/nvbench/printer_multiplex.cuh @@ -40,10 +40,7 @@ struct printer_multiplex : nvbench::printer_base return static_cast(*m_printers.back()); } - [[nodiscard]] std::size_t get_printer_count() const - { - return m_printers.size(); - } + [[nodiscard]] std::size_t get_printer_count() const { return m_printers.size(); } protected: void do_log_argv(const std::vector &argv) override; @@ -52,11 +49,10 @@ protected: void do_print_log_epilogue() override; void do_log(nvbench::log_level, const std::string &) override; void do_log_run_state(const nvbench::state &) override; - void do_process_bulk_data_float64( - nvbench::state &, - const std::string &, - const std::string &, - const std::vector &) override; + void do_process_bulk_data_float64(nvbench::state &, + const std::string &, + const std::string &, + const std::vector &) override; void do_print_benchmark_list(const benchmark_vector &benches) override; void do_print_benchmark_results(const benchmark_vector &benches) override; void do_set_completed_state_count(std::size_t states) override; diff --git a/nvbench/printer_multiplex.cxx b/nvbench/printer_multiplex.cxx index 86d9954..89867c1 100644 --- a/nvbench/printer_multiplex.cxx +++ b/nvbench/printer_multiplex.cxx @@ -67,11 +67,10 @@ void printer_multiplex::do_log_run_state(const nvbench::state &exec_state) } } -void printer_multiplex::do_process_bulk_data_float64( - state &state, - const std::string &tag, - const std::string &hint, - const std::vector &data) +void printer_multiplex::do_process_bulk_data_float64(state &state, + const std::string &tag, + const std::string &hint, + const std::vector &data) { for (auto &format_ptr : m_printers) { @@ -87,8 +86,7 @@ void printer_multiplex::do_print_benchmark_list(const benchmark_vector &benches) } } -void printer_multiplex::do_print_benchmark_results( - const benchmark_vector &benches) +void printer_multiplex::do_print_benchmark_results(const benchmark_vector &benches) { for (auto &format_ptr : m_printers) { diff --git a/nvbench/range.cuh b/nvbench/range.cuh index f0e8255..7000f87 100644 --- a/nvbench/range.cuh +++ b/nvbench/range.cuh @@ -29,13 +29,11 @@ namespace nvbench namespace detail { template -using range_output_t = std::conditional_t, - nvbench::float64_t, - nvbench::int64_t>; +using range_output_t = + std::conditional_t, nvbench::float64_t, nvbench::int64_t>; } -template > +template > auto range(InT start, InT end, InT stride = InT{1}) { if constexpr (std::is_floating_point_v) diff --git a/nvbench/runner.cuh b/nvbench/runner.cuh index 9435906..f32b222 100644 --- a/nvbench/runner.cuh +++ b/nvbench/runner.cuh @@ -37,8 +37,7 @@ struct runner_base void generate_states(); - void handle_sampling_exception(const std::exception &e, - nvbench::state &exec_state) const; + void handle_sampling_exception(const std::exception &e, nvbench::state &exec_state) const; void run_state_prologue(state &exec_state) const; void run_state_epilogue(state &exec_state) const; @@ -51,11 +50,10 @@ struct runner_base template struct runner : public runner_base { - using benchmark_type = BenchmarkType; - using kernel_generator = typename benchmark_type::kernel_generator; - using type_configs = typename benchmark_type::type_configs; - static constexpr std::size_t num_type_configs = - benchmark_type::num_type_configs; + using benchmark_type = BenchmarkType; + using kernel_generator = typename benchmark_type::kernel_generator; + using type_configs = typename benchmark_type::type_configs; + static constexpr std::size_t num_type_configs = benchmark_type::num_type_configs; explicit runner(benchmark_type &bench) : runner_base{bench} @@ -86,38 +84,37 @@ private: // Iterate through type_configs: std::size_t type_config_index = 0; - nvbench::tl::foreach([&self = *this, - &states = m_benchmark.m_states, - &type_config_index, - &device](auto type_config_wrapper) { - // Get current type_config: - using type_config = typename decltype(type_config_wrapper)::type; + nvbench::tl::foreach( + [&self = *this, &states = m_benchmark.m_states, &type_config_index, &device]( + auto type_config_wrapper) { + // Get current type_config: + using type_config = typename decltype(type_config_wrapper)::type; - // Find states with the current device / type_config - for (nvbench::state &cur_state : states) - { - if (cur_state.get_device() == device && - cur_state.get_type_config_index() == type_config_index) + // Find states with the current device / type_config + for (nvbench::state &cur_state : states) { - self.run_state_prologue(cur_state); - try + if (cur_state.get_device() == device && + cur_state.get_type_config_index() == type_config_index) { - kernel_generator{}(cur_state, type_config{}); - if (cur_state.is_skipped()) + self.run_state_prologue(cur_state); + try { - self.print_skip_notification(cur_state); + kernel_generator{}(cur_state, type_config{}); + if (cur_state.is_skipped()) + { + self.print_skip_notification(cur_state); + } } + catch (std::exception &e) + { + self.handle_sampling_exception(e, cur_state); + } + self.run_state_epilogue(cur_state); } - catch (std::exception &e) - { - self.handle_sampling_exception(e, cur_state); - } - self.run_state_epilogue(cur_state); } - } - ++type_config_index; - }); + ++type_config_index; + }); } }; diff --git a/nvbench/runner.cxx b/nvbench/runner.cxx index 3aba964..93cedf5 100644 --- a/nvbench/runner.cxx +++ b/nvbench/runner.cxx @@ -35,8 +35,7 @@ void runner_base::generate_states() m_benchmark.m_states = nvbench::detail::state_generator::create(m_benchmark); } -void runner_base::handle_sampling_exception(const std::exception &e, - state &exec_state) const +void runner_base::handle_sampling_exception(const std::exception &e, state &exec_state) const { // If the state is skipped, that means the execution framework class handled // the error already. @@ -62,8 +61,7 @@ void runner_base::handle_sampling_exception(const std::exception &e, void runner_base::run_state_prologue(nvbench::state &exec_state) const { // Log if a printer exists: - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.log_run_state(exec_state); @@ -73,19 +71,16 @@ void runner_base::run_state_prologue(nvbench::state &exec_state) const void runner_base::run_state_epilogue(state &exec_state) const { // Notify the printer that the state has completed:: - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.add_completed_state(); } } - void runner_base::print_skip_notification(state &exec_state) const { - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::skip, exec_state.get_skip_reason()); diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 336ba2b..53c7413 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -58,106 +58,68 @@ struct state_tester; struct state { // move-only - state(const state &) = delete; - state(state &&) = default; + state(const state &) = delete; + state(state &&) = default; state &operator=(const state &) = delete; - state &operator=(state &&) = default; + state &operator=(state &&) = default; - [[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const - { - return m_cuda_stream; - } - void set_cuda_stream(nvbench::cuda_stream &&stream) - { - m_cuda_stream = std::move(stream); - } + [[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const { return m_cuda_stream; } + void set_cuda_stream(nvbench::cuda_stream &&stream) { m_cuda_stream = std::move(stream); } /// The CUDA device associated with with this benchmark state. May be /// nullopt for CPU-only benchmarks. - [[nodiscard]] const std::optional &get_device() const - { - return m_device; - } + [[nodiscard]] const std::optional &get_device() const { return m_device; } /// An index into a benchmark::type_configs type_list. Returns 0 if no type /// axes in the associated benchmark. - [[nodiscard]] std::size_t get_type_config_index() const - { - return m_type_config_index; - } + [[nodiscard]] std::size_t get_type_config_index() const { return m_type_config_index; } [[nodiscard]] nvbench::int64_t get_int64(const std::string &axis_name) const; - [[nodiscard]] nvbench::int64_t - get_int64_or_default(const std::string &axis_name, - nvbench::int64_t default_value) const; + [[nodiscard]] nvbench::int64_t get_int64_or_default(const std::string &axis_name, + nvbench::int64_t default_value) const; - [[nodiscard]] nvbench::float64_t - get_float64(const std::string &axis_name) const; - [[nodiscard]] nvbench::float64_t - get_float64_or_default(const std::string &axis_name, - nvbench::float64_t default_value) const; + [[nodiscard]] nvbench::float64_t get_float64(const std::string &axis_name) const; + [[nodiscard]] nvbench::float64_t get_float64_or_default(const std::string &axis_name, + nvbench::float64_t default_value) const; - [[nodiscard]] const std::string & - get_string(const std::string &axis_name) const; - [[nodiscard]] const std::string & - get_string_or_default(const std::string &axis_name, - const std::string &default_value) const; + [[nodiscard]] const std::string &get_string(const std::string &axis_name) const; + [[nodiscard]] const std::string &get_string_or_default(const std::string &axis_name, + const std::string &default_value) const; void add_element_count(std::size_t elements, std::string column_name = {}); void set_element_count(std::size_t elements) { m_element_count = elements; } - [[nodiscard]] std::size_t get_element_count() const - { - return m_element_count; - } + [[nodiscard]] std::size_t get_element_count() const { return m_element_count; } template void add_global_memory_reads(std::size_t count, std::string column_name = {}) { - this->add_global_memory_reads(count * sizeof(ElementType), - std::move(column_name)); + this->add_global_memory_reads(count * sizeof(ElementType), std::move(column_name)); } void add_global_memory_reads(std::size_t bytes, std::string column_name = {}); template void add_global_memory_writes(std::size_t count, std::string column_name = {}) { - this->add_global_memory_writes(count * sizeof(ElementType), - std::move(column_name)); + this->add_global_memory_writes(count * sizeof(ElementType), std::move(column_name)); } - void add_global_memory_writes(std::size_t bytes, - std::string column_name = {}); + void add_global_memory_writes(std::size_t bytes, std::string column_name = {}); void add_buffer_size(std::size_t num_bytes, std::string summary_tag, std::string column_name = {}, std::string description = {}); - void set_global_memory_rw_bytes(std::size_t bytes) - { - m_global_memory_rw_bytes = bytes; - } - [[nodiscard]] std::size_t get_global_memory_rw_bytes() const - { - return m_global_memory_rw_bytes; - } + void set_global_memory_rw_bytes(std::size_t bytes) { m_global_memory_rw_bytes = bytes; } + [[nodiscard]] std::size_t get_global_memory_rw_bytes() const { return m_global_memory_rw_bytes; } void skip(std::string reason) { m_skip_reason = std::move(reason); } [[nodiscard]] bool is_skipped() const { return !m_skip_reason.empty(); } - [[nodiscard]] const std::string &get_skip_reason() const - { - return m_skip_reason; - } + [[nodiscard]] const std::string &get_skip_reason() const { return m_skip_reason; } /// Execute at least this many trials per measurement. @{ - [[nodiscard]] nvbench::int64_t get_min_samples() const - { - return m_min_samples; - } - void set_min_samples(nvbench::int64_t min_samples) - { - m_min_samples = min_samples; - } + [[nodiscard]] nvbench::int64_t get_min_samples() const { return m_min_samples; } + void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; } /// @} /// If true, the benchmark is only run once, skipping all warmup runs and only @@ -222,20 +184,14 @@ struct state } ///@} - [[nodiscard]] const named_values &get_axis_values() const - { - return m_axis_values; - } + [[nodiscard]] const named_values &get_axis_values() const { return m_axis_values; } /*! * Return a string of "axis_name1=input_string1 axis_name2=input_string2 ..." */ [[nodiscard]] std::string get_axis_values_as_string(bool color = false) const; - [[nodiscard]] const benchmark_base &get_benchmark() const - { - return m_benchmark; - } + [[nodiscard]] const benchmark_base &get_benchmark() const { return m_benchmark; } void collect_l1_hit_rates() { m_collect_l1_hit_rates = true; } void collect_l2_hit_rates() { m_collect_l2_hit_rates = true; } @@ -252,26 +208,11 @@ struct state collect_dram_throughput(); } - [[nodiscard]] bool is_l1_hit_rate_collected() const - { - return m_collect_l1_hit_rates; - } - [[nodiscard]] bool is_l2_hit_rate_collected() const - { - return m_collect_l2_hit_rates; - } - [[nodiscard]] bool is_stores_efficiency_collected() const - { - return m_collect_stores_efficiency; - } - [[nodiscard]] bool is_loads_efficiency_collected() const - { - return m_collect_loads_efficiency; - } - [[nodiscard]] bool is_dram_throughput_collected() const - { - return m_collect_dram_throughput; - } + [[nodiscard]] bool is_l1_hit_rate_collected() const { return m_collect_l1_hit_rates; } + [[nodiscard]] bool is_l2_hit_rate_collected() const { return m_collect_l2_hit_rates; } + [[nodiscard]] bool is_stores_efficiency_collected() const { return m_collect_stores_efficiency; } + [[nodiscard]] bool is_loads_efficiency_collected() const { return m_collect_loads_efficiency; } + [[nodiscard]] bool is_dram_throughput_collected() const { return m_collect_dram_throughput; } [[nodiscard]] bool is_cupti_required() const { @@ -306,8 +247,7 @@ struct state template void exec(KernelLauncher &&kernel_launcher) { - this->exec(nvbench::exec_tag::none, - std::forward(kernel_launcher)); + this->exec(nvbench::exec_tag::none, std::forward(kernel_launcher)); } private: diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 0774faa..d8e15ff 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -65,9 +65,8 @@ nvbench::int64_t state::get_int64(const std::string &axis_name) const return m_axis_values.get_int64(axis_name); } -nvbench::int64_t -state::get_int64_or_default(const std::string &axis_name, - nvbench::int64_t default_value) const +nvbench::int64_t state::get_int64_or_default(const std::string &axis_name, + nvbench::int64_t default_value) const try { return this->get_int64(axis_name); @@ -82,9 +81,8 @@ nvbench::float64_t state::get_float64(const std::string &axis_name) const return m_axis_values.get_float64(axis_name); } -nvbench::float64_t -state::get_float64_or_default(const std::string &axis_name, - nvbench::float64_t default_value) const +nvbench::float64_t state::get_float64_or_default(const std::string &axis_name, + nvbench::float64_t default_value) const try { return this->get_float64(axis_name); @@ -99,9 +97,8 @@ const std::string &state::get_string(const std::string &axis_name) const return m_axis_values.get_string(axis_name); } -const std::string & -state::get_string_or_default(const std::string &axis_name, - const std::string &default_value) const +const std::string &state::get_string_or_default(const std::string &axis_name, + const std::string &default_value) const try { return this->get_string(axis_name); @@ -125,20 +122,18 @@ summary &state::add_summary(summary s) const summary &state::get_summary(std::string_view tag) const { // Check tags first - auto iter = - std::find_if(m_summaries.cbegin(), - m_summaries.cend(), - [&tag](const auto &s) { return s.get_tag() == tag; }); + auto iter = std::find_if(m_summaries.cbegin(), m_summaries.cend(), [&tag](const auto &s) { + return s.get_tag() == tag; + }); if (iter != m_summaries.cend()) { return *iter; } // Then names: - iter = - std::find_if(m_summaries.cbegin(), - m_summaries.cend(), - [&tag](const auto &s) { return s.get_string("name") == tag; }); + iter = std::find_if(m_summaries.cbegin(), m_summaries.cend(), [&tag](const auto &s) { + return s.get_string("name") == tag; + }); if (iter != m_summaries.cend()) { return *iter; @@ -150,20 +145,18 @@ const summary &state::get_summary(std::string_view tag) const summary &state::get_summary(std::string_view tag) { // Check tags first - auto iter = - std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { - return s.get_tag() == tag; - }); + auto iter = std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { + return s.get_tag() == tag; + }); if (iter != m_summaries.end()) { return *iter; } // Then names: - iter = - std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { - return s.get_string("name") == tag; - }); + iter = std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { + return s.get_string("name") == tag; + }); if (iter != m_summaries.end()) { return *iter; @@ -187,18 +180,17 @@ std::string state::get_axis_values_as_string(bool color) const // Create a Key=Value list of all parameters: fmt::memory_buffer buffer; - auto append_key_value = [&buffer, &style](const std::string &key, - const auto &value, - std::string value_fmtstr = "{}") { - constexpr auto key_format = fmt::emphasis::italic; - constexpr auto value_format = fmt::emphasis::bold; + auto append_key_value = + [&buffer, &style](const std::string &key, const auto &value, std::string value_fmtstr = "{}") { + constexpr auto key_format = fmt::emphasis::italic; + constexpr auto value_format = fmt::emphasis::bold; - fmt::format_to(buffer, - "{}{}={}", - buffer.size() == 0 ? "" : " ", - fmt::format(style(key_format), "{}", key), - fmt::format(style(value_format), value_fmtstr, value)); - }; + fmt::format_to(buffer, + "{}{}={}", + buffer.size() == 0 ? "" : " ", + fmt::format(style(key_format), "{}", key), + fmt::format(style(value_format), value_fmtstr, value)); + }; if (m_device) { @@ -211,8 +203,7 @@ std::string state::get_axis_values_as_string(bool color) const const auto axis_type = m_axis_values.get_type(name); // Handle power-of-two int64 axes differently: - if (axis_type == named_values::type::int64 && - axes.get_int64_axis(name).is_power_of_two()) + if (axis_type == named_values::type::int64 && axes.get_int64_axis(name).is_power_of_two()) { const nvbench::int64_t value = m_axis_values.get_int64(name); const nvbench::int64_t exponent = int64_axis::compute_log2(value); @@ -242,10 +233,9 @@ std::string state::get_short_description(bool color) const return color ? fmt_style : no_style; }; - return fmt::format( - "{} [{}]", - fmt::format(style(fmt::emphasis::bold), "{}", m_benchmark.get().get_name()), - this->get_axis_values_as_string(color)); + return fmt::format("{} [{}]", + fmt::format(style(fmt::emphasis::bold), "{}", m_benchmark.get().get_name()), + this->get_axis_values_as_string(color)); } void state::add_element_count(std::size_t elements, std::string column_name) diff --git a/nvbench/string_axis.cuh b/nvbench/string_axis.cuh index 2f526e7..a4e8c62 100644 --- a/nvbench/string_axis.cuh +++ b/nvbench/string_axis.cuh @@ -36,25 +36,13 @@ struct string_axis final : public axis_base ~string_axis() final; - void set_inputs(std::vector inputs) - { - m_values = std::move(inputs); - } - [[nodiscard]] const std::string &get_value(std::size_t i) const - { - return m_values[i]; - } + void set_inputs(std::vector inputs) { m_values = std::move(inputs); } + [[nodiscard]] const std::string &get_value(std::size_t i) const { return m_values[i]; } private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_values.size(); } - std::string do_get_input_string(std::size_t i) const final - { - return m_values[i]; - } + std::string do_get_input_string(std::size_t i) const final { return m_values[i]; } std::string do_get_description(std::size_t) const final { return {}; } std::vector m_values; diff --git a/nvbench/summary.cuh b/nvbench/summary.cuh index 4576b15..66093c0 100644 --- a/nvbench/summary.cuh +++ b/nvbench/summary.cuh @@ -92,10 +92,10 @@ struct summary : public nvbench::named_values {} // move-only - summary(const summary &) = delete; - summary(summary &&) = default; + summary(const summary &) = delete; + summary(summary &&) = default; summary &operator=(const summary &) = delete; - summary &operator=(summary &&) = default; + summary &operator=(summary &&) = default; void set_tag(std::string tag) { m_tag = std::move(tag); } [[nodiscard]] const std::string &get_tag() const { return m_tag; } diff --git a/nvbench/test_kernels.cuh b/nvbench/test_kernels.cuh index e08db31..f01305c 100644 --- a/nvbench/test_kernels.cuh +++ b/nvbench/test_kernels.cuh @@ -38,8 +38,8 @@ namespace nvbench __global__ void sleep_kernel(double seconds) { const auto start = cuda::std::chrono::high_resolution_clock::now(); - const auto ns = cuda::std::chrono::nanoseconds( - static_cast(seconds * 1000 * 1000 * 1000)); + const auto ns = + cuda::std::chrono::nanoseconds(static_cast(seconds * 1000 * 1000 * 1000)); const auto finish = start + ns; auto now = cuda::std::chrono::high_resolution_clock::now(); @@ -53,7 +53,7 @@ __global__ void sleep_kernel(double seconds) * Naive copy of `n` values from `in` -> `out`. */ template -__global__ void copy_kernel(const T* in, U* out, std::size_t n) +__global__ void copy_kernel(const T *in, U *out, std::size_t n) { const auto init = blockIdx.x * blockDim.x + threadIdx.x; const auto step = blockDim.x * gridDim.x; @@ -68,7 +68,7 @@ __global__ void copy_kernel(const T* in, U* out, std::size_t n) * For `i <- [0,n)`, `out[i] = in[i] % 2`. */ template -__global__ void mod2_kernel(const T* in, U* out, std::size_t n) +__global__ void mod2_kernel(const T *in, U *out, std::size_t n) { const auto init = blockIdx.x * blockDim.x + threadIdx.x; const auto step = blockDim.x * gridDim.x; @@ -79,4 +79,4 @@ __global__ void mod2_kernel(const T* in, U* out, std::size_t n) } } -} +} // namespace nvbench diff --git a/nvbench/type_axis.cuh b/nvbench/type_axis.cuh index 2ee9144..27c1cc1 100644 --- a/nvbench/type_axis.cuh +++ b/nvbench/type_axis.cuh @@ -43,7 +43,7 @@ struct type_axis final : public axis_base template void set_inputs(); - void set_active_inputs(const std::vector& inputs); + void set_active_inputs(const std::vector &inputs); [[nodiscard]] bool get_is_active(const std::string &input) const; [[nodiscard]] bool get_is_active(std::size_t index) const; @@ -57,23 +57,13 @@ struct type_axis final : public axis_base /** * The index in this axis of the type with the specified `input_string`. */ - [[nodiscard]] std::size_t - get_type_index(const std::string &input_string) const; + [[nodiscard]] std::size_t get_type_index(const std::string &input_string) const; private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_input_strings.size(); } - std::string do_get_input_string(std::size_t i) const final - { - return m_input_strings[i]; - } - std::string do_get_description(std::size_t i) const final - { - return m_descriptions[i]; - } + std::string do_get_input_string(std::size_t i) const final { return m_input_strings[i]; } + std::string do_get_description(std::size_t i) const final { return m_descriptions[i]; } std::vector m_input_strings; std::vector m_descriptions; diff --git a/nvbench/type_axis.cxx b/nvbench/type_axis.cxx index af436ad..e678ff9 100644 --- a/nvbench/type_axis.cxx +++ b/nvbench/type_axis.cxx @@ -35,10 +35,10 @@ void type_axis::set_active_inputs(const std::vector &inputs) { m_mask.clear(); m_mask.resize(m_input_strings.size(), false); - for (const auto& input : inputs) + for (const auto &input : inputs) { const auto idx = this->get_type_index(input); - m_mask[idx] = true; + m_mask[idx] = true; } } @@ -47,21 +47,16 @@ bool type_axis::get_is_active(const std::string &input) const return this->get_is_active(this->get_type_index(input)); } -bool type_axis::get_is_active(std::size_t idx) const -{ - return m_mask.at(idx); -} +bool type_axis::get_is_active(std::size_t idx) const { return m_mask.at(idx); } std::size_t type_axis::get_active_count() const { - return static_cast( - std::count(m_mask.cbegin(), m_mask.cend(), true)); + return static_cast(std::count(m_mask.cbegin(), m_mask.cend(), true)); } std::size_t type_axis::get_type_index(const std::string &input_string) const { - auto it = - std::find(m_input_strings.cbegin(), m_input_strings.cend(), input_string); + auto it = std::find(m_input_strings.cbegin(), m_input_strings.cend(), input_string); if (it == m_input_strings.end()) { NVBENCH_THROW(std::runtime_error, diff --git a/nvbench/type_strings.cuh b/nvbench/type_strings.cuh index 287e0f9..b915854 100644 --- a/nvbench/type_strings.cuh +++ b/nvbench/type_strings.cuh @@ -30,17 +30,17 @@ namespace nvbench std::string demangle(const std::string &str); template -std::string demangle() { return demangle(typeid(T).name()); } +std::string demangle() +{ + return demangle(typeid(T).name()); +} template struct type_strings { // The string used to identify the type in shorthand (e.g. output tables and // CLI options): - static std::string input_string() - { - return nvbench::demangle(); - } + static std::string input_string() { return nvbench::demangle(); } // A more descriptive identifier for the type, if input_string is not a common // identifier. May be blank if `input_string` is obvious. @@ -56,10 +56,7 @@ struct type_strings> // A more descriptive identifier for the type, if input_string is not a common // identifier. May be blank if `input_string` is obvious. - static std::string description() - { - return nvbench::demangle>(); - } + static std::string description() { return nvbench::demangle>(); } }; } // namespace nvbench @@ -67,15 +64,15 @@ struct type_strings> /*! * Declare an `input_string` and `description` to use with a specific `type`. */ -#define NVBENCH_DECLARE_TYPE_STRINGS(Type, InputString, Description) \ - namespace nvbench \ - { \ - template <> \ - struct type_strings \ - { \ - static std::string input_string() { return {InputString}; } \ - static std::string description() { return {Description}; } \ - }; \ +#define NVBENCH_DECLARE_TYPE_STRINGS(Type, InputString, Description) \ + namespace nvbench \ + { \ + template <> \ + struct type_strings \ + { \ + static std::string input_string() { return {InputString}; } \ + static std::string description() { return {Description}; } \ + }; \ } NVBENCH_DECLARE_TYPE_STRINGS(nvbench::int8_t, "I8", "int8_t");