diff --git a/.clang-format b/.clang-format index e593f29..61bfc27 100644 --- a/.clang-format +++ b/.clang-format @@ -41,16 +41,28 @@ CompactNamespaces: false ContinuationIndentWidth: 2 IncludeBlocks: Regroup IncludeCategories: - - Regex: '^$' + - Regex: '^$' + Priority: 11 IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 diff --git a/.devcontainer/README.md b/.devcontainer/README.md index e84b5f3..c359cc2 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -34,7 +34,7 @@ CCCL uses [Development Containers](https://containers.dev/) to provide consisten - Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it. - ![Shows "Reopen in Container" in command pallete.](./img/open_in_container_manual.png) + ![Shows "Reopen in Container" in command palette.](./img/open_in_container_manual.png) 4. Select an environment with the desired CTK and host compiler from the list: @@ -136,7 +136,7 @@ For more information, see the `.devcontainer/make_devcontainers.sh --help` messa 2. Install WSL 2 by running: ```bash -wsl --install +wsl --install ``` This should probably install Ubuntu distro as a default. @@ -182,14 +182,14 @@ then run `sudo systemctl restart docker.service`. 10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located). 11. If prompted, choose `Reopen in Container`. - + - If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`. 12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order. From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration: -13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message. +13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message. > Failed opening a web browser at https://github.com/login/device exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH diff --git a/.devcontainer/launch.sh b/.devcontainer/launch.sh index a9ef143..dab6340 100755 --- a/.devcontainer/launch.sh +++ b/.devcontainer/launch.sh @@ -304,4 +304,3 @@ main() { } main "$@" - diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 1113040..4306215 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -14,4 +14,4 @@ # # Only add commits that are pure formatting changes (e.g. clang-format version changes, etc). 8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100 - +3440855dbd405db614861885ad1577fffd882867 # Initial addition of pre-commit.ci formatting. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..8ba3419 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,70 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +ci: + autofix_commit_msg: | + [pre-commit.ci] auto code formatting + autofix_prs: false + autoupdate_branch: '' + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: quarterly + skip: [] + submodules: false + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: end-of-file-fixer + - id: mixed-line-ending + - id: trailing-whitespace + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v19.1.6 + hooks: + - id: clang-format + types_or: [file] + files: | + (?x)^( + ^.*\.c$| + ^.*\.cpp$| + ^.*\.cu$| + ^.*\.cuh$| + ^.*\.cxx$| + ^.*\.h$| + ^.*\.hpp$| + ^.*\.inl$| + ^.*\.mm$ + ) + args: ["-fallback-style=none", "-style=file", "-i"] + + # TODO/REMINDER: add the Ruff vscode extension to the devcontainers + # Ruff, the Python auto-correcting linter/formatter written in Rust + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.6 + hooks: + - id: ruff # linter + - id: ruff-format # formatter + + # TOML lint & format + - repo: https://github.com/ComPWA/taplo-pre-commit + rev: v0.9.3 + hooks: + # See https://github.com/NVIDIA/cccl/issues/3426 + # - id: taplo-lint + # exclude: "^docs/" + - id: taplo-format + exclude: "^docs/" + + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] + exclude: | + (?x)^( + build| + CITATION.md + ) + + +default_language_version: + python: python3 diff --git a/ci/ninja_summary.py b/ci/ninja_summary.py index f496db5..526a58b 100755 --- a/ci/ninja_summary.py +++ b/ci/ninja_summary.py @@ -65,6 +65,7 @@ long_ext_count = 10 class Target: """Represents a single line read for a .ninja_log file.""" + def __init__(self, start, end): """Creates a target object by passing in the start/end times in seconds as a float.""" @@ -94,9 +95,9 @@ class Target: """ # Allow for modest floating-point errors epsilon = 0.000002 - if (self.weighted_duration > self.Duration() + epsilon): - print('%s > %s?' % (self.weighted_duration, self.Duration())) - assert (self.weighted_duration <= self.Duration() + epsilon) + if self.weighted_duration > self.Duration() + epsilon: + print("%s > %s?" % (self.weighted_duration, self.Duration())) + assert self.weighted_duration <= self.Duration() + epsilon return self.weighted_duration def DescribeTargets(self): @@ -104,10 +105,10 @@ class Target: # Some build steps generate dozens of outputs - handle them sanely. # The max_length was chosen so that it can fit most of the long # single-target names, while minimizing word wrapping. - result = ', '.join(self.targets) + result = ", ".join(self.targets) max_length = 65 if len(result) > max_length: - result = result[:max_length] + '...' + result = result[:max_length] + "..." return result @@ -121,12 +122,11 @@ def ReadTargets(log, show_all): # targets. if not header: return [] - assert header == '# ninja log v5\n', \ - 'unrecognized ninja log version %r' % header + assert header == "# ninja log v5\n", "unrecognized ninja log version %r" % header targets_dict = {} last_end_seen = 0.0 for line in log: - parts = line.strip().split('\t') + parts = line.strip().split("\t") if len(parts) != 5: # If ninja.exe is rudely halted then the .ninja_log file may be # corrupt. Silently continue. @@ -165,17 +165,17 @@ def ReadTargets(log, show_all): def GetExtension(target, extra_patterns): """Return the file extension that best represents a target. - For targets that generate multiple outputs it is important to return a - consistent 'canonical' extension. Ultimately the goal is to group build steps - by type.""" + For targets that generate multiple outputs it is important to return a + consistent 'canonical' extension. Ultimately the goal is to group build steps + by type.""" for output in target.targets: if extra_patterns: - for fn_pattern in extra_patterns.split(';'): - if fnmatch.fnmatch(output, '*' + fn_pattern + '*'): + for fn_pattern in extra_patterns.split(";"): + if fnmatch.fnmatch(output, "*" + fn_pattern + "*"): return fn_pattern # Not a true extension, but a good grouping. - if output.endswith('type_mappings'): - extension = 'type_mappings' + if output.endswith("type_mappings"): + extension = "type_mappings" break # Capture two extensions if present. For example: file.javac.jar should @@ -185,26 +185,26 @@ def GetExtension(target, extra_patterns): extension = ext2 + ext1 # Preserve the order in the file name. if len(extension) == 0: - extension = '(no extension found)' + extension = "(no extension found)" - if ext1 in ['.pdb', '.dll', '.exe']: - extension = 'PEFile (linking)' + if ext1 in [".pdb", ".dll", ".exe"]: + extension = "PEFile (linking)" # Make sure that .dll and .exe are grouped together and that the # .dll.lib files don't cause these to be listed as libraries break - if ext1 in ['.so', '.TOC']: - extension = '.so (linking)' + if ext1 in [".so", ".TOC"]: + extension = ".so (linking)" # Attempt to identify linking, avoid identifying as '.TOC' break # Make sure .obj files don't get categorized as mojo files - if ext1 in ['.obj', '.o']: + if ext1 in [".obj", ".o"]: break # Jars are the canonical output of java targets. - if ext1 == '.jar': + if ext1 == ".jar": break # Normalize all mojo related outputs to 'mojo'. - if output.count('.mojom') > 0: - extension = 'mojo' + if output.count(".mojom") > 0: + extension = "mojo" break return extension @@ -229,8 +229,8 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting): if target.end > latest: latest = target.end total_cpu_time += target.Duration() - task_start_stop_times.append((target.start, 'start', target)) - task_start_stop_times.append((target.end, 'stop', target)) + task_start_stop_times.append((target.start, "start", target)) + task_start_stop_times.append((target.end, "stop", target)) length = latest - earliest weighted_total = 0.0 @@ -256,10 +256,10 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting): if num_running > 0: # Update the total weighted time up to this moment. last_weighted_time += (time - last_time) / float(num_running) - if action_name == 'start': + if action_name == "start": # Record the total weighted task time when this task starts. running_tasks[target] = last_weighted_time - if action_name == 'stop': + if action_name == "stop": # Record the change in the total weighted task time while this task # ran. weighted_duration = last_weighted_time - running_tasks[target] @@ -267,24 +267,27 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting): weighted_total += weighted_duration del running_tasks[target] last_time = time - assert (len(running_tasks) == 0) + assert len(running_tasks) == 0 # Warn if the sum of weighted times is off by more than half a second. if abs(length - weighted_total) > 500: - print('Warning: Possible corrupt ninja log, results may be ' - 'untrustworthy. Length = %.3f, weighted total = %.3f' % - (length, weighted_total)) + print( + "Warning: Possible corrupt ninja log, results may be " + "untrustworthy. Length = %.3f, weighted total = %.3f" + % (length, weighted_total) + ) # Print the slowest build steps: - print(' Longest build steps:') + print(" Longest build steps:") if elapsed_time_sorting: entries.sort(key=lambda x: x.Duration()) else: entries.sort(key=lambda x: x.WeightedDuration()) for target in entries[-long_count:]: - print(' %8.1f weighted s to build %s (%.1f s elapsed time)' % - (target.WeightedDuration(), target.DescribeTargets(), - target.Duration())) + print( + " %8.1f weighted s to build %s (%.1f s elapsed time)" + % (target.WeightedDuration(), target.DescribeTargets(), target.Duration()) + ) # Sum up the time by file extension/type of the output file count_by_ext = {} @@ -293,51 +296,56 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting): # Scan through all of the targets to build up per-extension statistics. for target in entries: extension = GetExtension(target, extra_step_types) - time_by_ext[extension] = time_by_ext.get(extension, - 0) + target.Duration() - weighted_time_by_ext[extension] = weighted_time_by_ext.get( - extension, 0) + target.WeightedDuration() + time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration() + weighted_time_by_ext[extension] = ( + weighted_time_by_ext.get(extension, 0) + target.WeightedDuration() + ) count_by_ext[extension] = count_by_ext.get(extension, 0) + 1 - print(' Time by build-step type:') + print(" Time by build-step type:") # Copy to a list with extension name and total time swapped, to (time, ext) if elapsed_time_sorting: - weighted_time_by_ext_sorted = sorted( - (y, x) for (x, y) in time_by_ext.items()) + weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in time_by_ext.items()) else: weighted_time_by_ext_sorted = sorted( - (y, x) for (x, y) in weighted_time_by_ext.items()) + (y, x) for (x, y) in weighted_time_by_ext.items() + ) # Print the slowest build target types: for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]: print( - ' %8.1f s weighted time to generate %d %s files ' - '(%1.1f s elapsed time sum)' % - (time, count_by_ext[extension], extension, time_by_ext[extension])) + " %8.1f s weighted time to generate %d %s files " + "(%1.1f s elapsed time sum)" + % (time, count_by_ext[extension], extension, time_by_ext[extension]) + ) - print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx ' - 'parallelism)' % - (length, total_cpu_time, total_cpu_time * 1.0 / length)) - print(' %d build steps completed, average of %1.2f/s' % - (len(entries), len(entries) / (length))) + print( + " %.1f s weighted time (%.1f s elapsed time sum, %1.1fx " + "parallelism)" % (length, total_cpu_time, total_cpu_time * 1.0 / length) + ) + print( + " %d build steps completed, average of %1.2f/s" + % (len(entries), len(entries) / (length)) + ) def main(): - log_file = '.ninja_log' - metrics_file = 'siso_metrics.json' + log_file = ".ninja_log" + metrics_file = "siso_metrics.json" parser = argparse.ArgumentParser() - parser.add_argument('-C', dest='build_directory', help='Build directory.') + parser.add_argument("-C", dest="build_directory", help="Build directory.") parser.add_argument( - '-s', - '--step-types', - help='semicolon separated fnmatch patterns for build-step grouping') + "-s", + "--step-types", + help="semicolon separated fnmatch patterns for build-step grouping", + ) parser.add_argument( - '-e', - '--elapsed_time_sorting', + "-e", + "--elapsed_time_sorting", default=False, - action='store_true', - help='Sort output by elapsed time instead of weighted time') - parser.add_argument('--log-file', - help="specific ninja log file to analyze.") + action="store_true", + help="Sort output by elapsed time instead of weighted time", + ) + parser.add_argument("--log-file", help="specific ninja log file to analyze.") args, _extra_args = parser.parse_known_args() if args.build_directory: log_file = os.path.join(args.build_directory, log_file) @@ -348,34 +356,35 @@ def main(): # Offer a convenient way to add extra step types automatically, # including when this script is run by autoninja. get() returns None if # the variable isn't set. - args.step_types = os.environ.get('chromium_step_types') + args.step_types = os.environ.get("chromium_step_types") if args.step_types: # Make room for the extra build types. global long_ext_count - long_ext_count += len(args.step_types.split(';')) + long_ext_count += len(args.step_types.split(";")) if os.path.exists(metrics_file): # Automatically handle summarizing siso builds. - cmd = ['siso.bat' if 'win32' in sys.platform else 'siso'] - cmd.extend(['metrics', 'summary']) + cmd = ["siso.bat" if "win32" in sys.platform else "siso"] + cmd.extend(["metrics", "summary"]) if args.build_directory: - cmd.extend(['-C', args.build_directory]) + cmd.extend(["-C", args.build_directory]) if args.step_types: - cmd.extend(['--step_types', args.step_types]) + cmd.extend(["--step_types", args.step_types]) if args.elapsed_time_sorting: - cmd.append('--elapsed_time_sorting') + cmd.append("--elapsed_time_sorting") subprocess.run(cmd) else: try: - with open(log_file, 'r') as log: + with open(log_file, "r") as log: entries = ReadTargets(log, False) if entries: - SummarizeEntries(entries, args.step_types, - args.elapsed_time_sorting) + SummarizeEntries( + entries, args.step_types, args.elapsed_time_sorting + ) except IOError: - print('Log file %r not found, no build summary created.' % log_file) + print("Log file %r not found, no build summary created." % log_file) return errno.ENOENT -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/ci/sccache_hit_rate.sh b/ci/sccache_hit_rate.sh index de8ae46..3284aa4 100755 --- a/ci/sccache_hit_rate.sh +++ b/ci/sccache_hit_rate.sh @@ -37,5 +37,5 @@ if [ $requests_diff -eq 0 ]; then else hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}') echo "sccache hit rate: $hit_rate%" >&2 - echo "$hit_rate" + echo "$hit_rate" fi diff --git a/cmake/DetectSupportedStandards.cmake b/cmake/DetectSupportedStandards.cmake index 6a86d6a..7dd186a 100644 --- a/cmake/DetectSupportedStandards.cmake +++ b/cmake/DetectSupportedStandards.cmake @@ -1,4 +1,4 @@ -# Detect the langauge standards supported by the current compilers. +# Detect the language standards supported by the current compilers. # # Usage: detect_supported_cxx_standards( ) # diff --git a/cmake/NVBenchUtilities.cmake b/cmake/NVBenchUtilities.cmake index caa79b8..ecef18e 100644 --- a/cmake/NVBenchUtilities.cmake +++ b/cmake/NVBenchUtilities.cmake @@ -14,7 +14,7 @@ # limitations under the License. # Passes all args directly to execute_process while setting up the following -# results variables and propogating them to the caller's scope: +# results variables and propagating them to the caller's scope: # # - nvbench_process_exit_code # - nvbench_process_stdout diff --git a/cmake/header_test.in.cxx b/cmake/header_test.in.cxx index 8772c83..3cf2897 100644 --- a/cmake/header_test.in.cxx +++ b/cmake/header_test.in.cxx @@ -9,9 +9,9 @@ // a potential macro collision and halts. // // Hacky way to build a string, but it works on all tested platforms. -#define NVBench_MACRO_CHECK(MACRO, HEADER) \ - NVBench_MACRO_CHECK_IMPL(Identifier MACRO should not be used from NVBench \ - headers due to conflicts with HEADER macros.) +#define NVBench_MACRO_CHECK(MACRO, HEADER) \ + NVBench_MACRO_CHECK_IMPL( \ + Identifier MACRO should not be used from NVBench headers due to conflicts with HEADER macros.) // Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we // don't want to #include any headers other than the one being tested. @@ -34,8 +34,8 @@ // library implementations unconditionally `#undef` these macros, which then // causes random failures later. // Leaving these commented out as a warning: Here be dragons. -//#define min(...) NVBench_MACRO_CHECK('min', windows.h) -//#define max(...) NVBench_MACRO_CHECK('max', windows.h) +// #define min(...) NVBench_MACRO_CHECK('min', windows.h) +// #define max(...) NVBench_MACRO_CHECK('max', windows.h) // termios.h conflicts (NVIDIA/thrust#1547) #define B0 NVBench_MACRO_CHECK("B0", termios.h) diff --git a/docs/cli_help.md b/docs/cli_help.md index 424c1be..d6af964 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -90,7 +90,7 @@ before any `--benchmark` arguments. * `--stopping-criterion ` - * After `--min-samples` is satisfied, use `` to detect if enough + * After `--min-samples` is satisfied, use `` to detect if enough samples were collected. * Only applies to Cold measurements. * Default is stdrel (`--stopping-criterion stdrel`) diff --git a/examples/auto_throughput.cu b/examples/auto_throughput.cu index 14b6b94..6ce641a 100644 --- a/examples/auto_throughput.cu +++ b/examples/auto_throughput.cu @@ -24,37 +24,33 @@ template __global__ void kernel(std::size_t stride, std::size_t elements, - const nvbench::int32_t * __restrict__ in, + const nvbench::int32_t *__restrict__ in, nvbench::int32_t *__restrict__ out) { - const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x; + const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x; const std::size_t step = gridDim.x * blockDim.x; - for (std::size_t i = stride * tid; - i < stride * elements; - i += stride * step) + for (std::size_t i = stride * tid; i < stride * elements; i += stride * step) { for (int j = 0; j < ItemsPerThread; j++) { - const auto read_id = (ItemsPerThread * i + j) % elements; + const auto read_id = (ItemsPerThread * i + j) % elements; const auto write_id = tid + j * elements; - out[write_id] = in[read_id]; + out[write_id] = in[read_id]; } } } - // `throughput_bench` copies a 128 MiB buffer of int32_t, and reports throughput // and cache hit rates. // // Calling state.collect_*() enables particular metric collection if nvbench // was build with CUPTI support (CMake option: -DNVBench_ENABLE_CUPTI=ON). template -void throughput_bench(nvbench::state &state, - nvbench::type_list>) +void throughput_bench(nvbench::state &state, nvbench::type_list>) { // Allocate input data: - const std::size_t stride = static_cast(state.get_int64("Stride")); + const std::size_t stride = static_cast(state.get_int64("Stride")); const std::size_t elements = 128 * 1024 * 1024 / sizeof(nvbench::int32_t); thrust::device_vector input(elements); thrust::device_vector output(elements * ItemsPerThread); @@ -72,12 +68,11 @@ void throughput_bench(nvbench::state &state, static_cast((elements + threads_in_block - 1) / threads_in_block); state.exec([&](nvbench::launch &launch) { - kernel - <<>>( - stride, - elements, - thrust::raw_pointer_cast(input.data()), - thrust::raw_pointer_cast(output.data())); + kernel<<>>( + stride, + elements, + thrust::raw_pointer_cast(input.data()), + thrust::raw_pointer_cast(output.data())); }); } diff --git a/examples/axes.cu b/examples/axes.cu index 44ae598..59139b5 100644 --- a/examples/axes.cu +++ b/examples/axes.cu @@ -71,18 +71,16 @@ void copy_sweep_grid_shape(nvbench::state &state) thrust::device_vector in(num_values, 0); thrust::device_vector out(num_values, 0); - state.exec( - [block_size, - num_blocks, - num_values, - in_ptr = thrust::raw_pointer_cast(in.data()), - out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { - (void) num_values; // clang thinks this is unused... - nvbench::copy_kernel<<>>( - in_ptr, - out_ptr, - num_values); - }); + state.exec([block_size, + num_blocks, + num_values, + in_ptr = thrust::raw_pointer_cast(in.data()), + out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + (void)num_values; // clang thinks this is unused... + nvbench::copy_kernel<<>>(in_ptr, + out_ptr, + num_values); + }); } NVBENCH_BENCH(copy_sweep_grid_shape) // Every second power of two from 64->1024: @@ -107,15 +105,12 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list) thrust::device_vector in(num_values, 0); thrust::device_vector out(num_values, 0); - state.exec( - [num_values, - in_ptr = thrust::raw_pointer_cast(in.data()), - out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { - (void) num_values; // clang thinks this is unused... - nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, - out_ptr, - num_values); - }); + state.exec([num_values, + in_ptr = thrust::raw_pointer_cast(in.data()), + out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + (void)num_values; // clang thinks this is unused... + nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values); + }); } // Define a type_list to use for the type axis: using cts_types = nvbench::type_list -void copy_type_conversion_sweep(nvbench::state &state, - nvbench::type_list) +void copy_type_conversion_sweep(nvbench::state &state, nvbench::type_list) { // Optional: Skip narrowing conversions. - if constexpr(sizeof(InputType) > sizeof(OutputType)) + if constexpr (sizeof(InputType) > sizeof(OutputType)) { state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."); return; @@ -154,15 +148,12 @@ void copy_type_conversion_sweep(nvbench::state &state, thrust::device_vector in(num_values, 0); thrust::device_vector out(num_values, 0); - state.exec( - [num_values, - in_ptr = thrust::raw_pointer_cast(in.data()), - out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { - (void) num_values; // clang thinks this is unused... - nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, - out_ptr, - num_values); - }); + state.exec([num_values, + in_ptr = thrust::raw_pointer_cast(in.data()), + out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + (void)num_values; // clang thinks this is unused... + nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values); + }); } // Optional: Skip when InputType == OutputType. This approach avoids // instantiating the benchmark at all. @@ -178,6 +169,5 @@ using ctcs_types = nvbench::type_list; -NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, - NVBENCH_TYPE_AXES(ctcs_types, ctcs_types)) +NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types)) .set_type_axes_names({"In", "Out"}); diff --git a/examples/custom_criterion.cu b/examples/custom_criterion.cu index 4661235..3be3743 100644 --- a/examples/custom_criterion.cu +++ b/examples/custom_criterion.cu @@ -36,10 +36,7 @@ public: protected: // Setup the criterion in the `do_initialize()` method: - virtual void do_initialize() override - { - m_num_samples = 0; - } + virtual void do_initialize() override { m_num_samples = 0; } // Process new measurements in the `add_measurement()` method: virtual void do_add_measurement(nvbench::float64_t /* measurement */) override @@ -52,7 +49,6 @@ protected: { return m_num_samples >= m_params.get_int64("max-samples"); } - }; // Register the criterion with NVBench: @@ -71,7 +67,7 @@ void throughput_bench(nvbench::state &state) state.add_global_memory_writes(num_values); state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) { - (void) num_values; // clang thinks this is unused... + (void)num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(output.data()), diff --git a/examples/enums.cu b/examples/enums.cu index fa149ac..ff3bef2 100644 --- a/examples/enums.cu +++ b/examples/enums.cu @@ -17,7 +17,6 @@ */ #include - #include // Enum to use as parameter axis: @@ -68,12 +67,10 @@ void runtime_enum_sweep_string(nvbench::state &state) // Create inputs, etc, configure runtime kernel parameters, etc. // Just a dummy kernel. - state.exec([](nvbench::launch &launch) { - nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); - }); + state.exec( + [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); }); } -NVBENCH_BENCH(runtime_enum_sweep_string) - .add_string_axis("MyEnum", {"A", "B", "C"}); +NVBENCH_BENCH(runtime_enum_sweep_string).add_string_axis("MyEnum", {"A", "B", "C"}); //============================================================================== // Sweep through enum values at runtime using an int64 axis. @@ -97,9 +94,8 @@ void runtime_enum_sweep_int64(nvbench::state &state) // Create inputs, etc, configure runtime kernel parameters, etc. // Just a dummy kernel. - state.exec([](nvbench::launch &launch) { - nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); - }); + state.exec( + [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); }); } NVBENCH_BENCH(runtime_enum_sweep_int64) .add_int64_axis("MyEnum", @@ -178,12 +174,10 @@ void compile_time_enum_sweep(nvbench::state &state, // Template parameters, static dispatch, etc. // Just a dummy kernel. - state.exec([](nvbench::launch &launch) { - nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); - }); + state.exec( + [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); }); } -using MyEnumList = - nvbench::enum_type_list; +using MyEnumList = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList)) .set_type_axes_names({"MyEnum"}); @@ -199,16 +193,14 @@ NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList)) // * `-12` (struct std::integral_constant) // ``` template -void compile_time_int_sweep(nvbench::state &state, - nvbench::type_list>) +void compile_time_int_sweep(nvbench::state &state, nvbench::type_list>) { // Use IntValue in compile time contexts. // Template parameters, static dispatch, etc. // Just a dummy kernel. - state.exec([](nvbench::launch &launch) { - nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); - }); + state.exec( + [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); }); } using MyInts = nvbench::enum_type_list<0, 16, 4096, -12>; NVBENCH_BENCH_TYPES(compile_time_int_sweep, NVBENCH_TYPE_AXES(MyInts)) diff --git a/examples/exec_tag_sync.cu b/examples/exec_tag_sync.cu index 1366931..02c57c5 100644 --- a/examples/exec_tag_sync.cu +++ b/examples/exec_tag_sync.cu @@ -53,9 +53,7 @@ void sequence_bench(nvbench::state &state) // nvbench::exec_tag::sync indicates that this will implicitly sync: state.exec(nvbench::exec_tag::sync, [&data](nvbench::launch &launch) { - thrust::sequence(thrust::device.on(launch.get_stream()), - data.begin(), - data.end()); + thrust::sequence(thrust::device.on(launch.get_stream()), data.begin(), data.end()); }); } NVBENCH_BENCH(sequence_bench); diff --git a/examples/exec_tag_timer.cu b/examples/exec_tag_timer.cu index e283f43..2eda842 100644 --- a/examples/exec_tag_timer.cu +++ b/examples/exec_tag_timer.cu @@ -23,8 +23,8 @@ // Thrust simplifies memory management, etc: #include -#include #include +#include #include // mod2_inplace performs an in-place mod2 over every element in `data`. `data` @@ -54,7 +54,7 @@ void mod2_inplace(nvbench::state &state) state.exec(nvbench::exec_tag::timer, // Lambda now takes a `timer` argument: [&input, &data, num_values](nvbench::launch &launch, auto &timer) { - (void) num_values; // clang thinks this is unused... + (void)num_values; // clang thinks this is unused... // Reset working data: thrust::copy(thrust::device.on(launch.get_stream()), diff --git a/examples/skip.cu b/examples/skip.cu index fc96656..2757fac 100644 --- a/examples/skip.cu +++ b/examples/skip.cu @@ -72,14 +72,12 @@ NVBENCH_BENCH(runtime_skip) // Two type axes are swept, but configurations where InputType == OutputType are // skipped. template -void skip_overload(nvbench::state &state, - nvbench::type_list) +void skip_overload(nvbench::state &state, nvbench::type_list) { // This is a contrived example that focuses on the skip overloads, so this is // just a sleep kernel: - state.exec([](nvbench::launch &launch) { - nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); - }); + state.exec( + [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); }); } // Overload of skip_overload that is called when InputType == OutputType. template @@ -107,9 +105,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list) { // This is a contrived example that focuses on the skip overloads, so this is // just a sleep kernel: - state.exec([](nvbench::launch &launch) { - nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); - }); + state.exec( + [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); }); } // Enable this overload if InputType is larger than OutputType template @@ -119,10 +116,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list) state.skip("sizeof(InputType) > sizeof(OutputType)."); } // The same type_list is used for both inputs/outputs. -using sn_types = nvbench::type_list; +using sn_types = + nvbench::type_list; // Setup benchmark: NVBENCH_BENCH_TYPES(skip_sfinae, NVBENCH_TYPE_AXES(sn_types, sn_types)) .set_type_axes_names({"In", "Out"}); diff --git a/examples/stream.cu b/examples/stream.cu index 20254e5..2373b32 100644 --- a/examples/stream.cu +++ b/examples/stream.cu @@ -52,7 +52,7 @@ void stream_bench(nvbench::state &state) state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream)); state.exec([&input, &output, num_values](nvbench::launch &) { - (void) num_values; // clang thinks this is unused... + (void)num_values; // clang thinks this is unused... copy(thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(output.data()), num_values); diff --git a/examples/summaries.cu b/examples/summaries.cu index 57428c4..ad4fc06 100644 --- a/examples/summaries.cu +++ b/examples/summaries.cu @@ -26,8 +26,8 @@ void summary_example(nvbench::state &state) { // Fetch parameters and compute duration in seconds: - const auto ms = static_cast(state.get_int64("ms")); - const auto us = static_cast(state.get_int64("us")); + const auto ms = static_cast(state.get_int64("ms")); + const auto us = static_cast(state.get_int64("us")); const auto duration = ms * 1e-3 + us * 1e-6; // Add a new column to the summary table with the derived duration used by the benchmark. diff --git a/examples/throughput.cu b/examples/throughput.cu index 24df6ee..5168a3f 100644 --- a/examples/throughput.cu +++ b/examples/throughput.cu @@ -51,7 +51,7 @@ void throughput_bench(nvbench::state &state) state.add_global_memory_writes(num_values); state.exec([&input, &output, num_values](nvbench::launch &launch) { - (void) num_values; // clang thinks this is unused... + (void)num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(output.data()), diff --git a/exec/nvbench-ctl.cu b/exec/nvbench-ctl.cu index 3968440..be26d6c 100644 --- a/exec/nvbench-ctl.cu +++ b/exec/nvbench-ctl.cu @@ -1,20 +1,20 @@ /* -* Copyright 2021 NVIDIA Corporation -* -* Licensed under the Apache License, Version 2.0 with the LLVM exception -* (the "License"); you may not use this file except in compliance with -* the License. -* -* You may obtain a copy of the License at -* -* http://llvm.org/foundation/relicensing/LICENSE.txt -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include @@ -24,7 +24,7 @@ int main(int argc, char const *const *argv) try { // If no args, substitute a new argv that prints the version - std::vector alt_argv; + std::vector alt_argv; if (argc == 1) { alt_argv.push_back("--version"); @@ -36,7 +36,7 @@ try NVBENCH_CUDA_CALL(cudaDeviceReset()); return 0; } -catch (std::exception & e) +catch (std::exception &e) { std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; return 1; diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx index 188c689..7afc900 100644 --- a/nvbench/axes_metadata.cxx +++ b/nvbench/axes_metadata.cxx @@ -19,13 +19,13 @@ #include #include +#include +#include + #include #include #include -#include -#include - namespace nvbench { diff --git a/nvbench/axis_base.cxx b/nvbench/axis_base.cxx index 166f1ba..f2d4a39 100644 --- a/nvbench/axis_base.cxx +++ b/nvbench/axis_base.cxx @@ -16,7 +16,7 @@ * limitations under the License. */ -#include "axis_base.cuh" +#include namespace nvbench { diff --git a/nvbench/benchmark.cuh b/nvbench/benchmark.cuh index a226070..c985793 100644 --- a/nvbench/benchmark.cuh +++ b/nvbench/benchmark.cuh @@ -18,9 +18,8 @@ #pragma once -#include - #include +#include #include #include diff --git a/nvbench/benchmark_manager.cuh b/nvbench/benchmark_manager.cuh index 51fab18..19406f4 100644 --- a/nvbench/benchmark_manager.cuh +++ b/nvbench/benchmark_manager.cuh @@ -45,7 +45,7 @@ struct benchmark_manager * benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA * environment in `main`. */ - void initialize(); + void initialize(); /** * Register a new benchmark. diff --git a/nvbench/benchmark_manager.cxx b/nvbench/benchmark_manager.cxx index 4ff9fe4..7fb59e1 100644 --- a/nvbench/benchmark_manager.cxx +++ b/nvbench/benchmark_manager.cxx @@ -17,9 +17,8 @@ */ #include - -#include #include +#include #include @@ -37,8 +36,8 @@ benchmark_manager &benchmark_manager::get() void benchmark_manager::initialize() { - const auto& mgr = device_manager::get(); - for (auto& bench : m_benchmarks) + const auto &mgr = device_manager::get(); + for (auto &bench : m_benchmarks) { if (!bench->get_is_cpu_only()) { diff --git a/nvbench/blocking_kernel.cu b/nvbench/blocking_kernel.cu index f347833..9514ee6 100644 --- a/nvbench/blocking_kernel.cu +++ b/nvbench/blocking_kernel.cu @@ -17,12 +17,10 @@ */ #include - #include #include -#include - #include +#include #include diff --git a/nvbench/criterion_manager.cuh b/nvbench/criterion_manager.cuh index 6c60993..65678b8 100644 --- a/nvbench/criterion_manager.cuh +++ b/nvbench/criterion_manager.cuh @@ -24,7 +24,6 @@ #include #include - #include namespace nvbench @@ -40,14 +39,14 @@ public: /** * @return The singleton criterion_manager instance. */ - static criterion_manager& get(); + static criterion_manager &get(); /** * Register a new stopping criterion. */ - nvbench::stopping_criterion_base& add(std::unique_ptr criterion); - nvbench::stopping_criterion_base& get_criterion(const std::string& name); - const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const; + nvbench::stopping_criterion_base &add(std::unique_ptr criterion); + nvbench::stopping_criterion_base &get_criterion(const std::string &name); + const nvbench::stopping_criterion_base &get_criterion(const std::string &name) const; using params_description = std::vector>; params_description get_params_description() const; diff --git a/nvbench/criterion_manager.cxx b/nvbench/criterion_manager.cxx index f4857e9..d2b480c 100644 --- a/nvbench/criterion_manager.cxx +++ b/nvbench/criterion_manager.cxx @@ -41,7 +41,7 @@ criterion_manager &criterion_manager::get() return registry; } -stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) +stopping_criterion_base &criterion_manager::get_criterion(const std::string &name) { auto iter = m_map.find(name); if (iter == m_map.end()) @@ -51,7 +51,8 @@ stopping_criterion_base& criterion_manager::get_criterion(const std::string& nam return *iter->second.get(); } -const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const +const nvbench::stopping_criterion_base & +criterion_manager::get_criterion(const std::string &name) const { auto iter = m_map.find(name); if (iter == m_map.end()) @@ -69,8 +70,7 @@ stopping_criterion_base &criterion_manager::add(std::unique_ptrsecond.get(); diff --git a/nvbench/csv_printer.cu b/nvbench/csv_printer.cu index a0db080..eba4f74 100644 --- a/nvbench/csv_printer.cu +++ b/nvbench/csv_printer.cu @@ -16,14 +16,12 @@ * limitations under the License. */ -#include - #include #include +#include #include -#include - #include +#include #include @@ -169,7 +167,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) std::size_t remaining = table.m_columns.size(); for (const auto &col : table.m_columns) { - fmt::format_to(std::back_inserter(buffer), "{}{}", col.rows[i], (--remaining == 0) ? "" : ","); + fmt::format_to(std::back_inserter(buffer), + "{}{}", + col.rows[i], + (--remaining == 0) ? "" : ","); } fmt::format_to(std::back_inserter(buffer), "\n"); } diff --git a/nvbench/cuda_timer.cuh b/nvbench/cuda_timer.cuh index e1c6e66..082cdca 100644 --- a/nvbench/cuda_timer.cuh +++ b/nvbench/cuda_timer.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index 6233ef0..6ce1cf7 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -17,7 +17,6 @@ */ #include - #include #include @@ -54,7 +53,9 @@ void nvpw_call(const NVPA_Status status) { if (status != NVPA_STATUS_SUCCESS) { - NVBENCH_THROW(std::runtime_error, "NVPW call returned error: {}", static_cast>(status)); + NVBENCH_THROW(std::runtime_error, + "NVPW call returned error: {}", + static_cast>(status)); } } diff --git a/nvbench/detail/entropy_criterion.cuh b/nvbench/detail/entropy_criterion.cuh index b0e4ebe..33d5634 100644 --- a/nvbench/detail/entropy_criterion.cuh +++ b/nvbench/detail/entropy_criterion.cuh @@ -18,9 +18,9 @@ #pragma once -#include -#include #include +#include +#include #include @@ -38,7 +38,7 @@ class entropy_criterion final : public stopping_criterion_base nvbench::detail::ring_buffer m_entropy_tracker{299}; // Used to avoid re-allocating temporary memory - std::vector m_probabilities; + std::vector m_probabilities; nvbench::float64_t compute_entropy(); @@ -49,7 +49,6 @@ protected: virtual void do_initialize() override; virtual void do_add_measurement(nvbench::float64_t measurement) override; virtual bool do_is_finished() override; - }; } // namespace nvbench::detail diff --git a/nvbench/detail/entropy_criterion.cxx b/nvbench/detail/entropy_criterion.cxx index 6d9ba8c..4cc0668 100644 --- a/nvbench/detail/entropy_criterion.cxx +++ b/nvbench/detail/entropy_criterion.cxx @@ -21,7 +21,6 @@ #include - namespace nvbench::detail { @@ -40,7 +39,7 @@ void entropy_criterion::do_initialize() m_freq_tracker.clear(); } -nvbench::float64_t entropy_criterion::compute_entropy() +nvbench::float64_t entropy_criterion::compute_entropy() { const std::size_t n = m_freq_tracker.size(); if (n == 0) @@ -70,15 +69,15 @@ void entropy_criterion::do_add_measurement(nvbench::float64_t measurement) m_total_cuda_time += measurement; { - auto key = measurement; + auto key = measurement; constexpr bool bin_keys = false; - if (bin_keys) + if (bin_keys) { const auto resolution_us = 0.5; - const auto resulution_s = resolution_us / 1'000'000; - const auto epsilon = resulution_s * 2; - key = std::round(key / epsilon) * epsilon; + const auto resulution_s = resolution_us / 1000000; + const auto epsilon = resulution_s * 2; + key = std::round(key / epsilon) * epsilon; } // This approach is about 3x faster than `std::{unordered_,}map` @@ -120,7 +119,7 @@ bool entropy_criterion::do_is_finished() const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean); - if (statistics::slope2deg(slope) > m_params.get_float64("max-angle")) + if (statistics::slope2deg(slope) > m_params.get_float64("max-angle")) { return false; } diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 1421241..2bbf0e0 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -25,13 +25,13 @@ #include #include +#include + #include #include #include #include -#include - namespace nvbench::detail { diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index 80f8e5a..2138b3d 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -18,8 +18,6 @@ #pragma once -#include - #include #include #include @@ -32,12 +30,13 @@ #include #include #include +#include + +#include #include #include -#include "nvbench/types.cuh" - namespace nvbench { diff --git a/nvbench/detail/measure_cpu_only.cuh b/nvbench/detail/measure_cpu_only.cuh index a42c604..39580bc 100644 --- a/nvbench/detail/measure_cpu_only.cuh +++ b/nvbench/detail/measure_cpu_only.cuh @@ -19,13 +19,12 @@ #pragma once #include +#include +#include #include #include #include -#include -#include - #include #include @@ -66,7 +65,7 @@ protected: nvbench::cpu_timer m_walltime_timer; nvbench::criterion_params m_criterion_params; - nvbench::stopping_criterion_base& m_stopping_criterion; + nvbench::stopping_criterion_base &m_stopping_criterion; bool m_run_once{false}; diff --git a/nvbench/detail/measure_cpu_only.cxx b/nvbench/detail/measure_cpu_only.cxx index 69095a2..4e95dae 100644 --- a/nvbench/detail/measure_cpu_only.cxx +++ b/nvbench/detail/measure_cpu_only.cxx @@ -24,11 +24,11 @@ #include #include +#include + #include #include -#include - namespace nvbench::detail { @@ -36,7 +36,8 @@ measure_cpu_only_base::measure_cpu_only_base(state &exec_state) : m_state{exec_state} , m_launch(m_state.get_cuda_stream()) , m_criterion_params{exec_state.get_criterion_params()} - , m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())} + , m_stopping_criterion{nvbench::criterion_manager::get().get_criterion( + exec_state.get_stopping_criterion())} , m_run_once{exec_state.get_run_once()} , m_min_samples{exec_state.get_min_samples()} , m_skip_time{exec_state.get_skip_time()} @@ -72,7 +73,7 @@ void measure_cpu_only_base::run_trials_prologue() { m_walltime_timer.start(); } void measure_cpu_only_base::record_measurements() { // Update and record timers and counters: - const auto cur_cpu_time = m_cpu_timer.get_duration(); + const auto cur_cpu_time = m_cpu_timer.get_duration(); m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time); m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time); @@ -188,8 +189,7 @@ void measure_cpu_only_base::generate_summaries() auto &summ = m_state.add_summary("nv/cpu_only/bw/global/bytes_per_second"); summ.set_string("name", "GlobalMem BW"); summ.set_string("hint", "byte_rate"); - summ.set_string("description", - "Number of bytes read/written per second."); + summ.set_string("description", "Number of bytes read/written per second."); summ.set_float64("value", avg_used_gmem_bw); } } // bandwidth @@ -210,9 +210,9 @@ void measure_cpu_only_base::generate_summaries() if (m_max_time_exceeded) { - const auto timeout = m_walltime_timer.get_duration(); + const auto timeout = m_walltime_timer.get_duration(); const auto max_noise = m_criterion_params.get_float64("max-noise"); - const auto min_time = m_criterion_params.get_float64("min-time"); + const auto min_time = m_criterion_params.get_float64("min-time"); if (cpu_noise > max_noise) { diff --git a/nvbench/detail/measure_cupti.cuh b/nvbench/detail/measure_cupti.cuh index ec7b212..9d9f689 100644 --- a/nvbench/detail/measure_cupti.cuh +++ b/nvbench/detail/measure_cupti.cuh @@ -24,13 +24,12 @@ #include #include #include -#include -#include -#include - #include #include #include +#include +#include +#include #include diff --git a/nvbench/detail/measure_hot.cu b/nvbench/detail/measure_hot.cu index 9497122..2a38f16 100644 --- a/nvbench/detail/measure_hot.cu +++ b/nvbench/detail/measure_hot.cu @@ -16,9 +16,8 @@ * limitations under the License. */ -#include - #include +#include #include #include #include diff --git a/nvbench/detail/ring_buffer.cuh b/nvbench/detail/ring_buffer.cuh index 5c00b24..e478eaa 100644 --- a/nvbench/detail/ring_buffer.cuh +++ b/nvbench/detail/ring_buffer.cuh @@ -19,12 +19,11 @@ #pragma once #include - #include +#include #include #include -#include #include namespace nvbench::detail @@ -76,14 +75,14 @@ public: return temp; } - ring_buffer_iterator operator+(difference_type n) const - { - return ring_buffer_iterator(m_index + n, m_capacity, m_ptr); + ring_buffer_iterator operator+(difference_type n) const + { + return ring_buffer_iterator(m_index + n, m_capacity, m_ptr); } - ring_buffer_iterator operator-(difference_type n) const - { - return ring_buffer_iterator(m_index - n, m_capacity, m_ptr); + ring_buffer_iterator operator-(difference_type n) const + { + return ring_buffer_iterator(m_index - n, m_capacity, m_ptr); } difference_type operator-(const ring_buffer_iterator &other) const @@ -121,13 +120,9 @@ private: std::size_t m_index{0}; bool m_full{false}; - std::size_t get_front_index() const - { - return m_full ? m_index : 0; - } + std::size_t get_front_index() const { return m_full ? m_index : 0; } public: - /** * Create a new ring buffer with the requested capacity. */ diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx index 26a897a..8a5ef82 100644 --- a/nvbench/detail/state_generator.cxx +++ b/nvbench/detail/state_generator.cxx @@ -16,15 +16,13 @@ * limitations under the License. */ -#include - #include +#include +#include #include #include #include -#include - #include #include #include @@ -165,7 +163,7 @@ void state_generator::build_axis_configs() config.set_string(axis_info.axis, axis.get_input_string(axis_info.index)); } } // type_si - } // type_axis_config generation + } // type_axis_config generation // non_type_axis_config generation { @@ -201,9 +199,9 @@ void state_generator::build_axis_configs() axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); break; } // switch (type) - } // for (axis_info : current_indices) - } // for non_type_sg configs - } // non_type_axis_config generation + } // for (axis_info : current_indices) + } // for non_type_sg configs + } // non_type_axis_config generation } void state_generator::build_states() diff --git a/nvbench/detail/statistics.cuh b/nvbench/detail/statistics.cuh index 522b4f2..719d6f8 100644 --- a/nvbench/detail/statistics.cuh +++ b/nvbench/detail/statistics.cuh @@ -26,12 +26,10 @@ #include #include #include -#include - #include #ifndef M_PI - #define M_PI 3.14159265358979323846 +#define M_PI 3.14159265358979323846 #endif namespace nvbench::detail::statistics @@ -154,7 +152,7 @@ nvbench::float64_t compute_r2(It first, for (std::size_t i = 0; i < n; ++i, ++first) { - const nvbench::float64_t y = *first; + const nvbench::float64_t y = *first; const nvbench::float64_t y_pred = slope * static_cast(i) + intercept; ss_tot += (y - mean_y) * (y - mean_y); @@ -179,19 +177,10 @@ compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t inter return compute_r2(first, last, compute_mean(first, last), slope, intercept); } -inline nvbench::float64_t rad2deg(nvbench::float64_t rad) -{ - return rad * 180.0 / M_PI; -} +inline nvbench::float64_t rad2deg(nvbench::float64_t rad) { return rad * 180.0 / M_PI; } -inline nvbench::float64_t slope2rad(nvbench::float64_t slope) -{ - return std::atan2(slope, 1.0); -} +inline nvbench::float64_t slope2rad(nvbench::float64_t slope) { return std::atan2(slope, 1.0); } -inline nvbench::float64_t slope2deg(nvbench::float64_t slope) -{ - return rad2deg(slope2rad(slope)); -} +inline nvbench::float64_t slope2deg(nvbench::float64_t slope) { return rad2deg(slope2rad(slope)); } } // namespace nvbench::detail::statistics diff --git a/nvbench/detail/stdrel_criterion.cuh b/nvbench/detail/stdrel_criterion.cuh index 5f87e84..8abdb73 100644 --- a/nvbench/detail/stdrel_criterion.cuh +++ b/nvbench/detail/stdrel_criterion.cuh @@ -18,9 +18,9 @@ #pragma once -#include -#include #include +#include +#include #include diff --git a/nvbench/detail/stdrel_criterion.cxx b/nvbench/detail/stdrel_criterion.cxx index a6c5ea8..c0f7ef8 100644 --- a/nvbench/detail/stdrel_criterion.cxx +++ b/nvbench/detail/stdrel_criterion.cxx @@ -29,7 +29,7 @@ stdrel_criterion::stdrel_criterion() void stdrel_criterion::do_initialize() { - m_total_samples = 0; + m_total_samples = 0; m_total_cuda_time = 0.0; m_cuda_times.clear(); m_noise_tracker.clear(); @@ -46,7 +46,7 @@ void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement) const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), m_cuda_times.cend(), mean_cuda_time); - const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; + const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; if (std::isfinite(cuda_rel_stdev)) { m_noise_tracker.push_back(cuda_rel_stdev); diff --git a/nvbench/detail/throw.cuh b/nvbench/detail/throw.cuh index e3bb9fd..00d05f4 100644 --- a/nvbench/detail/throw.cuh +++ b/nvbench/detail/throw.cuh @@ -19,6 +19,7 @@ #pragma once #include + #include #define NVBENCH_THROW(exception_type, format_str, ...) \ diff --git a/nvbench/detail/timestamps_kernel.cu b/nvbench/detail/timestamps_kernel.cu index 8fa7a64..31b138d 100644 --- a/nvbench/detail/timestamps_kernel.cu +++ b/nvbench/detail/timestamps_kernel.cu @@ -16,13 +16,13 @@ * limitations under the License. */ -#include - #include #include #include #include +#include + #include #include @@ -71,12 +71,11 @@ void timestamps_kernel::record(const nvbench::cuda_stream &stream) int num_sms = 0; NVBENCH_CUDA_CALL(cudaGetDevice(&device_id)); - NVBENCH_CUDA_CALL( - cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id)); + NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id)); get_timestamps_kernel<<(num_sms), 1, 0, stream.get_stream()>>>( m_device_timestamps, m_device_timestamps + 1); } -} // namespace nvbench +} // namespace nvbench::detail diff --git a/nvbench/detail/type_list_impl.cuh b/nvbench/detail/type_list_impl.cuh index e97aaaa..96ca901 100644 --- a/nvbench/detail/type_list_impl.cuh +++ b/nvbench/detail/type_list_impl.cuh @@ -82,7 +82,7 @@ struct cartesian_product, TL, using tail_prod = typename detail::cartesian_product>::type; using cur = typename detail::prepend_each::type; using next = typename detail::cartesian_product< - nvbench::type_list, TL, TLTail...>>::type; + nvbench::type_list, TL, TLTail...>>::type; using type = decltype(detail::concat(cur{}, next{})); }; diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu index 3b26cdb..b7a6c05 100644 --- a/nvbench/device_info.cu +++ b/nvbench/device_info.cu @@ -16,11 +16,10 @@ * limitations under the License. */ -#include - #include #include #include +#include #include #include diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh index 1bb5262..b31a096 100644 --- a/nvbench/device_info.cuh +++ b/nvbench/device_info.cuh @@ -18,17 +18,16 @@ #pragma once -#include - #include #include #include +#include + #include // CHAR_BIT #include -#include - #include +#include // forward declare this for internal storage struct nvmlDevice_st; diff --git a/nvbench/device_manager.cu b/nvbench/device_manager.cu index a70a18c..4ba40f7 100644 --- a/nvbench/device_manager.cu +++ b/nvbench/device_manager.cu @@ -16,13 +16,12 @@ * limitations under the License. */ -#include - -#include - #include #include #include +#include + +#include namespace nvbench { @@ -45,13 +44,13 @@ device_manager::device_manager() } } -const nvbench::device_info &device_manager::get_device(int id) -{ - if (id < 0) +const nvbench::device_info &device_manager::get_device(int id) +{ + if (id < 0) { NVBENCH_THROW(std::runtime_error, "Negative index: {}.", id); } - return m_devices.at(static_cast(id)); + return m_devices.at(static_cast(id)); } } // namespace nvbench diff --git a/nvbench/exec_tag.cuh b/nvbench/exec_tag.cuh index 26691b0..83bf271 100644 --- a/nvbench/exec_tag.cuh +++ b/nvbench/exec_tag.cuh @@ -101,10 +101,10 @@ using no_gpu_t = tag; using no_batch_t = tag; using modifier_mask_t = tag; -using hot_t = tag; -using cold_t = tag; -using cpu_only_t = tag; -using measure_mask_t = tag; +using hot_t = tag; +using cold_t = tag; +using cpu_only_t = tag; +using measure_mask_t = tag; constexpr inline none_t none; constexpr inline timer_t timer; diff --git a/nvbench/float64_axis.cuh b/nvbench/float64_axis.cuh index ef7b089..5e07b55 100644 --- a/nvbench/float64_axis.cuh +++ b/nvbench/float64_axis.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include @@ -40,7 +39,10 @@ struct float64_axis final : public axis_base [[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; } private: - std::unique_ptr do_clone() const final { return std::make_unique(*this); } + std::unique_ptr do_clone() const final + { + return std::make_unique(*this); + } std::size_t do_get_size() const final { return m_values.size(); } std::string do_get_input_string(std::size_t i) const final; std::string do_get_description(std::size_t i) const final; diff --git a/nvbench/int64_axis.cuh b/nvbench/int64_axis.cuh index adc95d1..d646309 100644 --- a/nvbench/int64_axis.cuh +++ b/nvbench/int64_axis.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/int64_axis.cxx b/nvbench/int64_axis.cxx index 599c388..16ca2e7 100644 --- a/nvbench/int64_axis.cxx +++ b/nvbench/int64_axis.cxx @@ -16,9 +16,8 @@ * limitations under the License. */ -#include - #include +#include #include diff --git a/nvbench/internal/markdown_table.cuh b/nvbench/internal/markdown_table.cuh index 518f57b..c484dbe 100644 --- a/nvbench/internal/markdown_table.cuh +++ b/nvbench/internal/markdown_table.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/internal/nvml.cuh b/nvbench/internal/nvml.cuh index 05c6764..656f5e1 100644 --- a/nvbench/internal/nvml.cuh +++ b/nvbench/internal/nvml.cuh @@ -21,12 +21,12 @@ #include #include -#include - #ifdef NVBENCH_HAS_NVML #include #endif // NVBENCH_HAS_NVML +#include + #include namespace nvbench::nvml @@ -38,6 +38,7 @@ struct NVMLLifetimeManager { NVMLLifetimeManager(); ~NVMLLifetimeManager(); + private: bool m_inited{false}; }; diff --git a/nvbench/json_printer.cu b/nvbench/json_printer.cu index f7e337a..f3010a1 100644 --- a/nvbench/json_printer.cu +++ b/nvbench/json_printer.cu @@ -16,24 +16,22 @@ * limitations under the License. */ -#include - #include #include #include +#include #include #include #include +#include #include #include #include -#include +#include #include -#include - #include #include #include @@ -105,7 +103,7 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values) default: NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type."); } // end switch (value type) - } // end foreach value name + } // end foreach value name } } // end namespace @@ -225,27 +223,26 @@ static void add_devices_section(nlohmann::ordered_json &root) auto &devices = root["devices"]; for (const auto &dev_info : nvbench::device_manager::get().get_devices()) { - auto &device = devices.emplace_back(); - device["id"] = dev_info.get_id(); - device["name"] = dev_info.get_name(); - device["sm_version"] = dev_info.get_sm_version(); - device["ptx_version"] = dev_info.get_ptx_version(); - device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate(); - device["number_of_sms"] = dev_info.get_number_of_sms(); - device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm(); - device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm(); - device["max_threads_per_block"] = dev_info.get_max_threads_per_block(); - device["registers_per_sm"] = dev_info.get_registers_per_sm(); - device["registers_per_block"] = dev_info.get_registers_per_block(); - device["global_memory_size"] = dev_info.get_global_memory_size(); - device["global_memory_bus_peak_clock_rate"] = - dev_info.get_global_memory_bus_peak_clock_rate(); - device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width(); - device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth(); - device["l2_cache_size"] = dev_info.get_l2_cache_size(); - device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); - device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block(); - device["ecc_state"] = dev_info.get_ecc_state(); + auto &device = devices.emplace_back(); + device["id"] = dev_info.get_id(); + device["name"] = dev_info.get_name(); + device["sm_version"] = dev_info.get_sm_version(); + device["ptx_version"] = dev_info.get_ptx_version(); + device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate(); + device["number_of_sms"] = dev_info.get_number_of_sms(); + device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm(); + device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm(); + device["max_threads_per_block"] = dev_info.get_max_threads_per_block(); + device["registers_per_sm"] = dev_info.get_registers_per_sm(); + device["registers_per_block"] = dev_info.get_registers_per_block(); + device["global_memory_size"] = dev_info.get_global_memory_size(); + device["global_memory_bus_peak_clock_rate"] = dev_info.get_global_memory_bus_peak_clock_rate(); + device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width(); + device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth(); + device["l2_cache_size"] = dev_info.get_l2_cache_size(); + device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); + device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block(); + device["ecc_state"] = dev_info.get_ecc_state(); } } @@ -298,8 +295,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) false; #endif } // "nvbench" - } // "version" - } // "meta" + } // "version" + } // "meta" add_devices_section(root); @@ -362,8 +359,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) default: break; } // end switch (axis type) - } // end foreach axis value - } // end foreach axis + } // end foreach axis value + } // end foreach axis auto &states = bench["states"]; for (const auto &exec_state : bench_ptr->get_states()) @@ -431,8 +428,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) continue; } } // end foreach exec_state - } // end foreach benchmark - } // "benchmarks" + } // end foreach benchmark + } // "benchmarks" m_ostream << root.dump(2) << "\n"; } @@ -492,7 +489,7 @@ void json_printer::do_print_benchmark_list(const benchmark_vector &benches) default: break; } // end switch (axis type) - } // end foreach axis value + } // end foreach axis value } } // end foreach bench diff --git a/nvbench/json_printer.cuh b/nvbench/json_printer.cuh index 8457687..ae361ff 100644 --- a/nvbench/json_printer.cuh +++ b/nvbench/json_printer.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/markdown_printer.cu b/nvbench/markdown_printer.cu index db3d871..31c0bcf 100644 --- a/nvbench/markdown_printer.cu +++ b/nvbench/markdown_printer.cu @@ -16,15 +16,13 @@ * limitations under the License. */ -#include - #include #include +#include +#include #include #include -#include - #include #include @@ -72,8 +70,12 @@ void markdown_printer::do_print_device_info() "* Max Shared Memory: {} KiB/SM, {} KiB/Block\n", device.get_shared_memory_per_sm() / 1024, device.get_shared_memory_per_block() / 1024); - fmt::format_to(std::back_inserter(buffer), "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024); - fmt::format_to(std::back_inserter(buffer), "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm()); + fmt::format_to(std::back_inserter(buffer), + "* L2 Cache Size: {} KiB\n", + device.get_l2_cache_size() / 1024); + fmt::format_to(std::back_inserter(buffer), + "* Maximum Active Blocks: {}/SM\n", + device.get_max_blocks_per_sm()); fmt::format_to(std::back_inserter(buffer), "* Maximum Active Threads: {}/SM, {}/Block\n", device.get_max_threads_per_sm(), @@ -82,7 +84,9 @@ void markdown_printer::do_print_device_info() "* Available Registers: {}/SM, {}/Block\n", device.get_registers_per_sm(), device.get_registers_per_block()); - fmt::format_to(std::back_inserter(buffer), "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No"); + fmt::format_to(std::back_inserter(buffer), + "* ECC Enabled: {}\n", + device.get_ecc_state() ? "Yes" : "No"); fmt::format_to(std::back_inserter(buffer), "\n"); } m_ostream << fmt::to_string(buffer); @@ -191,9 +195,12 @@ void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vec { desc = fmt::format(" ({})", desc); } - fmt::format_to(std::back_inserter(buffer), " * `{}`{}\n", axis_ptr->get_input_string(i), desc); + fmt::format_to(std::back_inserter(buffer), + " * `{}`{}\n", + axis_ptr->get_input_string(i), + desc); } // end foreach value - } // end foreach axis + } // end foreach axis fmt::format_to(std::back_inserter(buffer), "\n"); } // end foreach bench diff --git a/nvbench/named_values.cxx b/nvbench/named_values.cxx index e672301..f5042ab 100644 --- a/nvbench/named_values.cxx +++ b/nvbench/named_values.cxx @@ -16,10 +16,9 @@ * limitations under the License. */ -#include - #include #include +#include #include diff --git a/nvbench/nvbench.cuh b/nvbench/nvbench.cuh index 3fb933f..bf1eb21 100644 --- a/nvbench/nvbench.cuh +++ b/nvbench/nvbench.cuh @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index eef46f8..0341075 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -34,6 +34,8 @@ #include #include +#include + #include #include #include @@ -44,12 +46,10 @@ #include #include #include +#include #include #include -#include -#include - namespace { diff --git a/nvbench/printer_base.cuh b/nvbench/printer_base.cuh index 13cf803..444752e 100644 --- a/nvbench/printer_base.cuh +++ b/nvbench/printer_base.cuh @@ -191,9 +191,9 @@ protected: virtual void do_process_bulk_data_float64(nvbench::state &, const std::string &, const std::string &, - const std::vector &){}; + const std::vector &) {}; - virtual void do_print_benchmark_list(const benchmark_vector &) + virtual void do_print_benchmark_list(const benchmark_vector &) { throw std::runtime_error{"nvbench::do_print_benchmark_list is not supported by this printer."}; } diff --git a/nvbench/runner.cuh b/nvbench/runner.cuh index f32b222..3924296 100644 --- a/nvbench/runner.cuh +++ b/nvbench/runner.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/runner.cxx b/nvbench/runner.cxx index 93cedf5..09ddb46 100644 --- a/nvbench/runner.cxx +++ b/nvbench/runner.cxx @@ -16,10 +16,9 @@ * limitations under the License. */ -#include - #include #include +#include #include #include diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 49af11b..df4ec91 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -20,13 +20,13 @@ #include #include +#include +#include + #include #include #include -#include -#include - namespace nvbench { diff --git a/nvbench/stopping_criterion.cuh b/nvbench/stopping_criterion.cuh index 006a699..5daaaa5 100644 --- a/nvbench/stopping_criterion.cuh +++ b/nvbench/stopping_criterion.cuh @@ -21,19 +21,21 @@ #include #include -#include - #include +#include #include namespace nvbench { -namespace detail +namespace detail { -constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds -constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation +constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds +constexpr nvbench::float64_t compat_max_noise() +{ + return 0.005; +} // 0.5% relative standard deviation } // namespace detail @@ -43,6 +45,7 @@ constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relati class criterion_params { nvbench::named_values m_named_values; + public: criterion_params(); criterion_params(std::initializer_list>); @@ -96,7 +99,7 @@ public: * * This method is called once per benchmark run, before any measurements are provided. */ - void initialize(const criterion_params ¶ms) + void initialize(const criterion_params ¶ms) { m_params.set_from(params); this->do_initialize(); @@ -105,18 +108,12 @@ public: /** * Add the latest measurement to the criterion */ - void add_measurement(nvbench::float64_t measurement) - { - this->do_add_measurement(measurement); - } + void add_measurement(nvbench::float64_t measurement) { this->do_add_measurement(measurement); } /** * Check if the criterion has been met for all measurements processed by `add_measurement` */ - bool is_finished() - { - return this->do_is_finished(); - } + bool is_finished() { return this->do_is_finished(); } protected: /** diff --git a/nvbench/stopping_criterion.cxx b/nvbench/stopping_criterion.cxx index 976a1a7..f6a4ae5 100644 --- a/nvbench/stopping_criterion.cxx +++ b/nvbench/stopping_criterion.cxx @@ -16,10 +16,8 @@ * limitations under the License. */ -#include - #include - +#include namespace nvbench { @@ -62,7 +60,7 @@ void criterion_params::set_from(const criterion_params &other) void criterion_params::set_int64(std::string name, nvbench::int64_t value) { - if (m_named_values.has_value(name)) + if (m_named_values.has_value(name)) { m_named_values.remove_value(name); } @@ -72,7 +70,7 @@ void criterion_params::set_int64(std::string name, nvbench::int64_t value) void criterion_params::set_float64(std::string name, nvbench::float64_t value) { - if (m_named_values.has_value(name)) + if (m_named_values.has_value(name)) { m_named_values.remove_value(name); } @@ -82,7 +80,7 @@ void criterion_params::set_float64(std::string name, nvbench::float64_t value) void criterion_params::set_string(std::string name, std::string value) { - if (m_named_values.has_value(name)) + if (m_named_values.has_value(name)) { m_named_values.remove_value(name); } @@ -110,15 +108,11 @@ std::string criterion_params::get_string(const std::string &name) const return m_named_values.get_string(name); } -std::vector criterion_params::get_names() const -{ - return m_named_values.get_names(); -} +std::vector criterion_params::get_names() const { return m_named_values.get_names(); } nvbench::named_values::type criterion_params::get_type(const std::string &name) const { return m_named_values.get_type(name); } - -} // namespace nvbench::detail +} // namespace nvbench diff --git a/nvbench/string_axis.cuh b/nvbench/string_axis.cuh index a8af16e..7491870 100644 --- a/nvbench/string_axis.cuh +++ b/nvbench/string_axis.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/type_axis.cuh b/nvbench/type_axis.cuh index 3a4c59a..3349ed9 100644 --- a/nvbench/type_axis.cuh +++ b/nvbench/type_axis.cuh @@ -19,7 +19,6 @@ #pragma once #include - #include #include diff --git a/nvbench/type_axis.cxx b/nvbench/type_axis.cxx index f89ec1d..b706971 100644 --- a/nvbench/type_axis.cxx +++ b/nvbench/type_axis.cxx @@ -16,9 +16,8 @@ * limitations under the License. */ -#include - #include +#include #include #include diff --git a/nvbench/type_list.cuh b/nvbench/type_list.cuh index e52425a..91118a8 100644 --- a/nvbench/type_list.cuh +++ b/nvbench/type_list.cuh @@ -18,7 +18,7 @@ #pragma once -#include "detail/type_list_impl.cuh" +#include #include #include diff --git a/nvbench/type_strings.cxx b/nvbench/type_strings.cxx index 5046db3..2aa9ac8 100644 --- a/nvbench/type_strings.cxx +++ b/nvbench/type_strings.cxx @@ -27,11 +27,11 @@ #endif #ifdef NVBENCH_CXXABI_DEMANGLE -#include - #include #include +#include + namespace { struct free_wrapper diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..86f9909 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[tool.ruff] +target-version = "py310" +fix = true +show-fixes = true +exclude = ["docs/tools"] + +[tool.ruff.lint] +extend-select = ["I"] + +[tool.codespell] +# To run codespell interactively and fix errors that pre-commit reports, try +# `codespell -i 3 -w -H`. This will run with interactive review (-i 3), writes +# changes to disk (-w), and includes hidden files (-H). +# Note: pre-commit passes explicit lists of files here, which this skip file +# list doesn't override - the skip list is only to allow you to run codespell +# interactively. +skip = "./.git,./build,./CITATION.md" +# ignore short words, and typename parameters like OffsetT +ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" +ignore-words-list = "inout,imovable,optionN,aCount,quitted,Invokable,countr,unexpect,numer,euclidian,couldn,OffsetT,FromM" +builtin = "clear" +quiet-level = 3 diff --git a/scripts/nvbench_compare.py b/scripts/nvbench_compare.py index 2f4fae1..d5ff0b5 100755 --- a/scripts/nvbench_compare.py +++ b/scripts/nvbench_compare.py @@ -5,12 +5,11 @@ import math import os import sys -from colorama import Fore - import tabulate - +from colorama import Fore from nvbench_json import reader + # Parse version string into tuple, "x.y.z" -> (x, y, z) def version_tuple(v): return tuple(map(int, (v.split(".")))) @@ -139,15 +138,14 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): colalign.append("center") for device_id in device_ids: - rows = [] - plot_data = {'cmp': {}, 'ref': {}, 'cmp_noise': {}, 'ref_noise': {}} + plot_data = {"cmp": {}, "ref": {}, "cmp_noise": {}, "ref_noise": {}} for cmp_state in cmp_states: cmp_state_name = cmp_state["name"] - ref_state = next(filter(lambda st: st["name"] == cmp_state_name, - ref_states), - None) + ref_state = next( + filter(lambda st: st["name"] == cmp_state_name, ref_states), None + ) if not ref_state: continue @@ -158,9 +156,7 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): row = [] for axis_value in axis_values: axis_value_name = axis_value["name"] - row.append(format_axis_value(axis_value_name, - axis_value, - axes)) + row.append(format_axis_value(axis_value_name, axis_value, axes)) cmp_summaries = cmp_state["summaries"] ref_summaries = ref_state["summaries"] @@ -171,23 +167,37 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): def lookup_summary(summaries, tag): return next(filter(lambda s: s["tag"] == tag, summaries), None) - cmp_time_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/mean") - ref_time_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/mean") - cmp_noise_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/stdev/relative") - ref_noise_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/stdev/relative") + cmp_time_summary = lookup_summary( + cmp_summaries, "nv/cold/time/gpu/mean" + ) + ref_time_summary = lookup_summary( + ref_summaries, "nv/cold/time/gpu/mean" + ) + cmp_noise_summary = lookup_summary( + cmp_summaries, "nv/cold/time/gpu/stdev/relative" + ) + ref_noise_summary = lookup_summary( + ref_summaries, "nv/cold/time/gpu/stdev/relative" + ) # TODO: Use other timings, too. Maybe multiple rows, with a # "Timing" column + values "CPU/GPU/Batch"? - if not all([cmp_time_summary, - ref_time_summary, - cmp_noise_summary, - ref_noise_summary]): + if not all( + [ + cmp_time_summary, + ref_time_summary, + cmp_noise_summary, + ref_noise_summary, + ] + ): continue def extract_value(summary): summary_data = summary["data"] - value_data = next(filter(lambda v: v["name"] == "value", summary_data)) - assert(value_data["type"] == "float64") + value_data = next( + filter(lambda v: v["name"] == "value", summary_data) + ) + assert value_data["type"] == "float64" return value_data["value"] cmp_time = extract_value(cmp_time_summary) @@ -218,23 +228,27 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): if plot: axis_name = [] axis_value = "--" - for aid in range(len(axis_values)): + for aid in range(len(axis_values)): if axis_values[aid]["name"] != plot: - axis_name.append("{} = {}".format(axis_values[aid]["name"], axis_values[aid]["value"])) + axis_name.append( + "{} = {}".format( + axis_values[aid]["name"], axis_values[aid]["value"] + ) + ) else: - axis_value = float(axis_values[aid]["value"]) - axis_name = ', '.join(axis_name) + axis_value = float(axis_values[aid]["value"]) + axis_name = ", ".join(axis_name) - if axis_name not in plot_data['cmp']: - plot_data['cmp'][axis_name] = {} - plot_data['ref'][axis_name] = {} - plot_data['cmp_noise'][axis_name] = {} - plot_data['ref_noise'][axis_name] = {} + if axis_name not in plot_data["cmp"]: + plot_data["cmp"][axis_name] = {} + plot_data["ref"][axis_name] = {} + plot_data["cmp_noise"][axis_name] = {} + plot_data["ref_noise"][axis_name] = {} - plot_data['cmp'][axis_name][axis_value] = cmp_time - plot_data['ref'][axis_name][axis_value] = ref_time - plot_data['cmp_noise'][axis_name][axis_value] = cmp_noise - plot_data['ref_noise'][axis_name][axis_value] = ref_noise + plot_data["cmp"][axis_name][axis_value] = cmp_time + plot_data["ref"][axis_name][axis_value] = ref_time + plot_data["cmp_noise"][axis_name][axis_value] = cmp_noise + plot_data["ref_noise"][axis_name][axis_value] = ref_noise global config_count global unknown_count @@ -273,14 +287,13 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): print("## [%d] %s\n" % (device["id"], device["name"])) # colalign and github format require tabulate 0.8.3 if tabulate_version >= (0, 8, 3): - print(tabulate.tabulate(rows, - headers=headers, - colalign=colalign, - tablefmt="github")) + print( + tabulate.tabulate( + rows, headers=headers, colalign=colalign, tablefmt="github" + ) + ) else: - print(tabulate.tabulate(rows, - headers=headers, - tablefmt="markdown")) + print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown")) print("") @@ -295,18 +308,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): x = [float(x) for x in plot_data[key][axis].keys()] y = list(plot_data[key][axis].values()) - noise = list(plot_data[key + '_noise'][axis].values()) + noise = list(plot_data[key + "_noise"][axis].values()) top = [y[i] + y[i] * noise[i] for i in range(len(x))] bottom = [y[i] - y[i] * noise[i] for i in range(len(x))] - p = plt.plot(x, y, shape, marker='o', label=label) + p = plt.plot(x, y, shape, marker="o", label=label) plt.fill_between(x, bottom, top, color=p[0].get_color(), alpha=0.1) - - for axis in plot_data['cmp'].keys(): - plot_line('cmp', '-', axis) - plot_line('ref', '--', axis + ' ref') + for axis in plot_data["cmp"].keys(): + plot_line("cmp", "-", axis) + plot_line("ref", "--", axis + " ref") plt.legend() plt.show() @@ -314,11 +326,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot): def main(): help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]" - parser = argparse.ArgumentParser(prog='nvbench_compare', usage=help_text) - parser.add_argument('--threshold-diff', type=float, dest='threshold', default=0.0, - help='only show benchmarks where percentage diff is >= THRESHOLD') - parser.add_argument('--plot-along', type=str, dest='plot', default=None, - help='plot results') + parser = argparse.ArgumentParser(prog="nvbench_compare", usage=help_text) + parser.add_argument( + "--threshold-diff", + type=float, + dest="threshold", + default=0.0, + help="only show benchmarks where percentage diff is >= THRESHOLD", + ) + parser.add_argument( + "--plot-along", type=str, dest="plot", default=None, help="plot results" + ) args, files_or_dirs = parser.parse_known_args() print(files_or_dirs) @@ -336,14 +354,17 @@ def main(): continue r = os.path.join(files_or_dirs[0], f) c = os.path.join(files_or_dirs[1], f) - if os.path.isfile(r) and os.path.isfile(c) and \ - os.path.getsize(r) > 0 and os.path.getsize(c) > 0: + if ( + os.path.isfile(r) + and os.path.isfile(c) + and os.path.getsize(r) > 0 + and os.path.getsize(c) > 0 + ): to_compare.append((r, c)) else: to_compare = [(files_or_dirs[0], files_or_dirs[1])] for ref, comp in to_compare: - ref_root = reader.read_file(ref) cmp_root = reader.read_file(comp) @@ -355,7 +376,9 @@ def main(): print("Device sections do not match.") sys.exit(1) - compare_benches(ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot) + compare_benches( + ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot + ) print("# Summary\n") print("- Total Matches: %d" % config_count) @@ -365,5 +388,5 @@ def main(): return failure_count -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/scripts/nvbench_histogram.py b/scripts/nvbench_histogram.py index 5c37d29..4eb6155 100755 --- a/scripts/nvbench_histogram.py +++ b/scripts/nvbench_histogram.py @@ -1,19 +1,19 @@ #!/usr/bin/env python -import numpy as np -import pandas as pd - -import matplotlib.pyplot as plt -import seaborn as sns import argparse import os import sys +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns from nvbench_json import reader + def parse_files(): help_text = "%(prog)s [nvbench.out.json | dir/] ..." - parser = argparse.ArgumentParser(prog='nvbench_histogram', usage=help_text) + parser = argparse.ArgumentParser(prog="nvbench_histogram", usage=help_text) args, files_or_dirs = parser.parse_known_args() @@ -41,14 +41,14 @@ def parse_files(): def extract_filename(summary): summary_data = summary["data"] value_data = next(filter(lambda v: v["name"] == "filename", summary_data)) - assert(value_data["type"] == "string") + assert value_data["type"] == "string" return value_data["value"] def extract_size(summary): summary_data = summary["data"] value_data = next(filter(lambda v: v["name"] == "size", summary_data)) - assert(value_data["type"] == "int64") + assert value_data["type"] == "int64" return int(value_data["value"]) @@ -57,9 +57,10 @@ def parse_samples_meta(filename, state): if not summaries: return None, None - summary = next(filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", - summaries), - None) + summary = next( + filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", summaries), + None, + ) if not summary: return None, None @@ -81,7 +82,7 @@ def parse_samples(filename, state): with open(samples_filename, "rb") as f: samples = np.fromfile(f, " (x, y, z) @@ -39,7 +38,8 @@ def format_walltime(seconds_in): "{:0>2d}:".format(h) if h > 1e-9 else "", "{:0>2d}:".format(m) if (h > 1e-9 or m > 1e-9) else "", "{:0>2d}.".format(s) if (h > 1e-9 or m > 1e-9) else "{:d}.".format(s), - "{:0>3d}".format(ms)) + "{:0>3d}".format(ms), + ) def format_percentage(percentage): @@ -58,7 +58,7 @@ measure_column_names = {"cold": "Isolated", "batch": "Batch", "cupti": "CUPTI"} def init_measures(): out = {} for name in measure_names: - out[name] = 0. + out[name] = 0.0 return out @@ -67,17 +67,17 @@ def get_measures(state): times = {} for name in measure_names: measure_walltime_tag = "nv/{}/walltime".format(name) - summary = next(filter(lambda s: s["tag"] == measure_walltime_tag, - summaries), - None) + summary = next( + filter(lambda s: s["tag"] == measure_walltime_tag, summaries), None + ) if not summary: continue walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"])) - assert(walltime_data["type"] == "float64") + assert walltime_data["type"] == "float64" walltime = walltime_data["value"] walltime = float(walltime) - times[name] = walltime if walltime else 0. + times[name] = walltime if walltime else 0.0 return times @@ -87,7 +87,7 @@ def merge_measures(target, src): def sum_measures(measures): - total_time = 0. + total_time = 0.0 for time in measures.values(): total_time += time return total_time @@ -194,20 +194,21 @@ def print_overview_section(data): # colalign and github format require tabulate 0.8.3 if tabulate_version >= (0, 8, 3): - print(tabulate.tabulate(rows, - headers=headers, - colalign=colalign, - tablefmt="github")) + print( + tabulate.tabulate( + rows, headers=headers, colalign=colalign, tablefmt="github" + ) + ) else: - print(tabulate.tabulate(rows, - headers=headers, - tablefmt="markdown")) + print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown")) print() # append_data_row_lambda args: (row_list, name, items[name]) -def print_measures_table(headers, colalign, items, total_measures, append_item_row_lambda): +def print_measures_table( + headers, colalign, items, total_measures, append_item_row_lambda +): total_time = sum_measures(total_measures) active_measures = get_active_measure_names(total_measures) num_user_columns = len(headers) @@ -248,14 +249,13 @@ def print_measures_table(headers, colalign, items, total_measures, append_item_r # colalign and github format require tabulate 0.8.3 if tabulate_version >= (0, 8, 3): - print(tabulate.tabulate(rows, - headers=headers, - colalign=colalign, - tablefmt="github")) + print( + tabulate.tabulate( + rows, headers=headers, colalign=colalign, tablefmt="github" + ) + ) else: - print(tabulate.tabulate(rows, - headers=headers, - tablefmt="markdown")) + print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown")) def print_files_section(data): @@ -319,7 +319,7 @@ def print_bench_section(bench_name, bench): def main(): help_text = "%(prog)s [nvbench.out.json | dir/]..." - parser = argparse.ArgumentParser(prog='nvbench_walltime', usage=help_text) + parser = argparse.ArgumentParser(prog="nvbench_walltime", usage=help_text) args, files_or_dirs = parser.parse_known_args() @@ -353,5 +353,5 @@ def main(): print_files_section(data) -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/testing/axes_metadata.cu b/testing/axes_metadata.cu index cf4d93a..9e54660 100644 --- a/testing/axes_metadata.cu +++ b/testing/axes_metadata.cu @@ -17,22 +17,19 @@ */ #include - #include #include #include -#include "test_asserts.cuh" - #include #include #include -using int_list = nvbench::type_list; +#include "test_asserts.cuh" + +using int_list = + nvbench::type_list; using float_list = nvbench::type_list; @@ -110,7 +107,6 @@ void test_default_type_axes_names() ASSERT(axes.get_type_axis(4).get_name() == "T4"); ASSERT(axes.get_type_axis(4).get_axis_index() == 4); } - } void test_type_axes() @@ -138,8 +134,7 @@ void test_type_axes() fmt::format_to(std::back_inserter(buffer), " - {}{}\n", input_string, - description.empty() ? "" - : fmt::format(" ({})", description)); + description.empty() ? "" : fmt::format(" ({})", description)); } } @@ -157,9 +152,8 @@ Axis: Other )expected"; const std::string test = fmt::to_string(buffer); - const auto diff = - std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend()); - const auto idx = static_cast(diff.second - test.cbegin()); + const auto diff = std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend()); + const auto idx = static_cast(diff.second - test.cbegin()); ASSERT_MSG(test == ref, "Differs at character {}.\n" "Expected:\n\"{}\"\n\n" @@ -189,9 +183,7 @@ void test_float64_axes() void test_int64_axes() { nvbench::axes_metadata axes; - axes.add_int64_axis("I64 Axis", - {10, 11, 12, 13, 14}, - nvbench::int64_axis_flags::none); + axes.add_int64_axis("I64 Axis", {10, 11, 12, 13, 14}, nvbench::int64_axis_flags::none); ASSERT(axes.get_axes().size() == 1); const auto &axis = axes.get_int64_axis("I64 Axis"); ASSERT(axis.get_size() == 5); @@ -205,9 +197,7 @@ void test_int64_axes() void test_int64_power_of_two_axes() { nvbench::axes_metadata axes; - axes.add_int64_axis("I64 POT Axis", - {1, 2, 3, 4, 5}, - nvbench::int64_axis_flags::power_of_two); + axes.add_int64_axis("I64 POT Axis", {1, 2, 3, 4, 5}, nvbench::int64_axis_flags::power_of_two); ASSERT(axes.get_axes().size() == 1); const auto &axis = axes.get_int64_axis("I64 POT Axis"); ASSERT(axis.get_size() == 5); diff --git a/testing/benchmark.cu b/testing/benchmark.cu index 9581b12..dfaf468 100644 --- a/testing/benchmark.cu +++ b/testing/benchmark.cu @@ -17,7 +17,6 @@ */ #include - #include #include #include @@ -25,8 +24,6 @@ #include #include -#include "test_asserts.cuh" - #include #include @@ -34,6 +31,8 @@ #include #include +#include "test_asserts.cuh" + template std::vector sort(std::vector &&vec) { @@ -61,34 +60,26 @@ void no_op_generator(nvbench::state &state) NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable); template -void template_no_op_generator(nvbench::state &state, - nvbench::type_list) +void template_no_op_generator(nvbench::state &state, nvbench::type_list) { - ASSERT(nvbench::type_strings::input_string() == - state.get_string("Integer")); - ASSERT(nvbench::type_strings::input_string() == - state.get_string("Float")); - ASSERT(nvbench::type_strings::input_string() == - state.get_string("Other")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("Integer")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("Float")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("Other")); // Enum params using non-templated version: no_op_generator(state); } -NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, - template_no_op_callable); +NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable); -using int_list = nvbench::type_list; +using int_list = + nvbench::type_list; using float_list = nvbench::type_list; using misc_list = nvbench::type_list; using lots_of_types_bench = - nvbench::benchmark>; + nvbench::benchmark>; using no_types_bench = nvbench::benchmark; @@ -110,8 +101,7 @@ void test_type_axes() fmt::format_to(std::back_inserter(buffer), " - {}{}\n", input_string, - description.empty() ? "" - : fmt::format(" ({})", description)); + description.empty() ? "" : fmt::format(" ({})", description)); } } @@ -300,9 +290,7 @@ void test_get_config_count() auto const num_devices = bench.get_devices().size(); - ASSERT_MSG(bench.get_config_count() == 72 * num_devices, - "Got {}", - bench.get_config_count()); + ASSERT_MSG(bench.get_config_count() == 72 * num_devices, "Got {}", bench.get_config_count()); } int main() diff --git a/testing/cpu_timer.cu b/testing/cpu_timer.cu index 4b58a95..99e7517 100644 --- a/testing/cpu_timer.cu +++ b/testing/cpu_timer.cu @@ -18,11 +18,11 @@ #include -#include "test_asserts.cuh" - #include #include +#include "test_asserts.cuh" + void test_basic() { using namespace std::literals::chrono_literals; diff --git a/testing/create.cu b/testing/create.cu index 6ed7fff..7ca8e26 100644 --- a/testing/create.cu +++ b/testing/create.cu @@ -16,17 +16,14 @@ * limitations under the License. */ -#include - #include #include +#include #include #include #include #include -#include "test_asserts.cuh" - #include #include @@ -34,6 +31,8 @@ #include #include +#include "test_asserts.cuh" + template std::vector sort(std::vector &&vec) { @@ -72,15 +71,11 @@ using misc_types = nvbench::type_list; using type_axes = nvbench::type_list; template -void template_no_op_generator(nvbench::state &state, - nvbench::type_list) +void template_no_op_generator(nvbench::state &state, nvbench::type_list) { - ASSERT(nvbench::type_strings::input_string() == - state.get_string("FloatT")); - ASSERT(nvbench::type_strings::input_string() == - state.get_string("IntT")); - ASSERT(nvbench::type_strings::input_string() == - state.get_string("IntT")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("FloatT")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("IntT")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("IntT")); // Enum params using non-templated version: no_op_generator(state); @@ -116,8 +111,7 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench, void validate_default_name() { - auto bench = - nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone(); + auto bench = nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone(); const std::string ref = "Params:\n"; @@ -127,8 +121,7 @@ void validate_default_name() void validate_custom_name() { - auto bench = - nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone(); + auto bench = nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone(); const std::string ref = "Params:\n"; @@ -138,8 +131,7 @@ void validate_custom_name() void validate_no_types() { - auto bench = - nvbench::benchmark_manager::get().get_benchmark("No Types").clone(); + auto bench = nvbench::benchmark_manager::get().get_benchmark("No Types").clone(); const std::string ref = R"expected(Params: Float: 11 Int: 1 String: One Params: Float: 11 Int: 2 String: One @@ -176,8 +168,7 @@ Params: Float: 13 Int: 3 String: Three void validate_only_types() { - auto bench = - nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone(); + auto bench = nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone(); const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool Params: FloatT: F32 IntT: I32 MiscT: void @@ -195,8 +186,7 @@ Params: FloatT: F64 IntT: I64 MiscT: void void validate_all_axes() { - auto bench = - nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone(); + auto bench = nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone(); const std::string ref = R"expected(Params: Float: 11 FloatT: F32 Int: 1 IntT: I32 MiscT: bool String: One diff --git a/testing/criterion_manager.cu b/testing/criterion_manager.cu index da0ddb0..b466c30 100644 --- a/testing/criterion_manager.cu +++ b/testing/criterion_manager.cu @@ -42,27 +42,34 @@ protected: void test_no_duplicates_are_allowed() { - nvbench::criterion_manager& manager = nvbench::criterion_manager::get(); - bool exception_triggered = false; + nvbench::criterion_manager &manager = nvbench::criterion_manager::get(); + bool exception_triggered = false; - try { - [[maybe_unused]] nvbench::stopping_criterion_base& _ = manager.get_criterion("custom"); - } catch(...) { + try + { + [[maybe_unused]] nvbench::stopping_criterion_base &_ = manager.get_criterion("custom"); + } + catch (...) + { exception_triggered = true; } ASSERT(exception_triggered); std::unique_ptr custom_ptr = std::make_unique(); - custom_criterion* custom_raw = custom_ptr.get(); + custom_criterion *custom_raw = custom_ptr.get(); ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw); - nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom"); + nvbench::stopping_criterion_base &custom = + nvbench::criterion_manager::get().get_criterion("custom"); ASSERT(custom_raw == &custom); exception_triggered = false; - try { + try + { manager.add(std::make_unique()); - } catch(...) { + } + catch (...) + { exception_triggered = true; } ASSERT(exception_triggered); diff --git a/testing/criterion_params.cu b/testing/criterion_params.cu index 4eceefa..92e2099 100644 --- a/testing/criterion_params.cu +++ b/testing/criterion_params.cu @@ -60,4 +60,3 @@ int main() test_compat_overwrite(); test_overwrite(); } - diff --git a/testing/cuda_timer.cu b/testing/cuda_timer.cu index d8e4a42..d610db5 100644 --- a/testing/cuda_timer.cu +++ b/testing/cuda_timer.cu @@ -16,19 +16,16 @@ * limitations under the License. */ -#include - #include +#include #include #include -#include "test_asserts.cuh" - #include -void test_basic(cudaStream_t time_stream, - cudaStream_t exec_stream, - bool expected) +#include "test_asserts.cuh" + +void test_basic(cudaStream_t time_stream, cudaStream_t exec_stream, bool expected) { nvbench::cuda_timer timer; diff --git a/testing/custom_main_custom_args.cu b/testing/custom_main_custom_args.cu index f7e331e..eb43a02 100644 --- a/testing/custom_main_custom_args.cu +++ b/testing/custom_main_custom_args.cu @@ -16,8 +16,8 @@ * limitations under the License. */ +#include #include -#include "nvbench/cuda_call.cuh" /****************************************************************************** * Install custom parser. @@ -35,7 +35,7 @@ // User code to handle a specific argument: void handle_my_custom_arg(); -// NVBench hook for modiifying the command line arguments before parsing: +// NVBench hook for modifying the command line arguments before parsing: void custom_arg_handler(std::vector &args) { // Handle and remove "--my-custom-arg" diff --git a/testing/custom_main_global_state_raii.cu b/testing/custom_main_global_state_raii.cu index e3584ab..8529178 100644 --- a/testing/custom_main_global_state_raii.cu +++ b/testing/custom_main_global_state_raii.cu @@ -19,8 +19,8 @@ #include #include -#include #include +#include /****************************************************************************** * Test having global state that is initialized and finalized via RAII. diff --git a/testing/device/noisy_bench.cu b/testing/device/noisy_bench.cu index 8cca3a1..62aaab0 100644 --- a/testing/device/noisy_bench.cu +++ b/testing/device/noisy_bench.cu @@ -29,12 +29,10 @@ void noisy_bench(nvbench::state &state) { // time, convert ms -> s - const auto mean = static_cast(state.get_float64("Mean")) / - 1000.f; + const auto mean = static_cast(state.get_float64("Mean")) / 1000.f; // rel stdev - const auto noise_pct = - static_cast(state.get_float64("Noise")); - const auto noise = noise_pct / 100.f; + const auto noise_pct = static_cast(state.get_float64("Noise")); + const auto noise = noise_pct / 100.f; // abs stdev const auto stdev = noise * mean; @@ -53,8 +51,7 @@ void noisy_bench(nvbench::state &state) try { return static_cast( - state.get_summary("nv/cold/time/gpu/stdev/relative") - .get_float64("value")); + state.get_summary("nv/cold/time/gpu/stdev/relative").get_float64("value")); } catch (std::invalid_argument &) { diff --git a/testing/entropy_criterion.cu b/testing/entropy_criterion.cu index df489c9..795e58f 100644 --- a/testing/entropy_criterion.cu +++ b/testing/entropy_criterion.cu @@ -20,11 +20,11 @@ #include #include -#include "test_asserts.cuh" - -#include -#include #include +#include +#include + +#include "test_asserts.cuh" void test_const() { @@ -32,7 +32,7 @@ void test_const() nvbench::detail::entropy_criterion criterion; criterion.initialize(params); - for (int i = 0; i < 6; i++) + for (int i = 0; i < 6; i++) { // nvbench wants at least 5 to compute the standard deviation criterion.add_measurement(42.0); } @@ -48,7 +48,7 @@ void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion) * 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+ * 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 | * 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 | - * 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample, + * 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample, * 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative * 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 | * 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+ diff --git a/testing/enum_type_list.cu b/testing/enum_type_list.cu index 88535ba..50e3c68 100644 --- a/testing/enum_type_list.cu +++ b/testing/enum_type_list.cu @@ -18,12 +18,12 @@ #include -#include "test_asserts.cuh" - #include #include +#include "test_asserts.cuh" + // If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39. #if defined(__GNUC__) && __GNUC__ == 7 #define USING_GCC_7 @@ -102,8 +102,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( void test_int() { ASSERT((std::is_same_v, nvbench::type_list<>>)); - ASSERT((std::is_same_v, - nvbench::type_list>>)); + ASSERT((std::is_same_v, nvbench::type_list>>)); ASSERT((std::is_same_v, nvbench::type_list, nvbench::enum_type<1>, @@ -115,42 +114,35 @@ void test_int() void test_scoped_enum() { #ifndef USING_GCC_7 - ASSERT(( - std::is_same_v, - nvbench::type_list>>)); + ASSERT((std::is_same_v, + nvbench::type_list>>)); #endif - ASSERT(( - std::is_same_v, - nvbench::type_list, - nvbench::enum_type, - nvbench::enum_type>>)); + ASSERT((std::is_same_v< + nvbench::enum_type_list, + nvbench::type_list, + nvbench::enum_type, + nvbench::enum_type>>)); } void test_unscoped_enum() { #ifndef USING_GCC_7 - ASSERT( - (std::is_same_v, - nvbench::type_list>>)); - ASSERT( - (std::is_same_v< - nvbench::enum_type_list, - nvbench::type_list, - nvbench::enum_type, - nvbench::enum_type>>)); + ASSERT((std::is_same_v, + nvbench::type_list>>)); + ASSERT((std::is_same_v, + nvbench::type_list, + nvbench::enum_type, + nvbench::enum_type>>)); #endif } void test_scoped_enum_type_strings() { - using values = nvbench::enum_type_list; - using val_1 = nvbench::tl::get<0, values>; - using val_2 = nvbench::tl::get<1, values>; - using val_3 = nvbench::tl::get<2, values>; + using values = + nvbench::enum_type_list; + using val_1 = nvbench::tl::get<0, values>; + using val_2 = nvbench::tl::get<1, values>; + using val_3 = nvbench::tl::get<2, values>; ASSERT((nvbench::type_strings::input_string() == "1")); ASSERT((nvbench::type_strings::description() == "scoped_enum::val_1")); ASSERT((nvbench::type_strings::input_string() == "2")); diff --git a/testing/float64_axis.cu b/testing/float64_axis.cu index 8d1ea02..5509014 100644 --- a/testing/float64_axis.cu +++ b/testing/float64_axis.cu @@ -34,8 +34,7 @@ void test_empty() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Empty"); @@ -62,8 +61,7 @@ void test_basic() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Basic"); diff --git a/testing/int64_axis.cu b/testing/int64_axis.cu index 84d7dd1..4684a0b 100644 --- a/testing/int64_axis.cu +++ b/testing/int64_axis.cu @@ -18,10 +18,10 @@ #include -#include "test_asserts.cuh" - #include +#include "test_asserts.cuh" + void test_empty() { nvbench::int64_axis axis("Empty"); @@ -36,8 +36,7 @@ void test_empty() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Empty"); @@ -66,8 +65,7 @@ void test_basic() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "BasicAxis"); @@ -87,8 +85,7 @@ void test_basic() void test_power_of_two() { nvbench::int64_axis axis{"POTAxis"}; - axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, - nvbench::int64_axis_flags::power_of_two); + axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two); const std::vector ref_inputs{0, 1, 2, 3, 7, 6, 5, 4}; const std::vector ref_values{1, 2, 4, 8, 128, 64, 32, 16}; @@ -102,14 +99,12 @@ void test_power_of_two() for (size_t i = 0; i < 8; ++i) { ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i])); - ASSERT(axis.get_description(i) == - fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); + ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); } const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "POTAxis"); @@ -122,8 +117,7 @@ void test_power_of_two() for (size_t i = 0; i < 8; ++i) { ASSERT(clone->get_input_string(i) == fmt::to_string(ref_inputs[i])); - ASSERT(clone->get_description(i) == - fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); + ASSERT(clone->get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); } } @@ -250,8 +244,7 @@ void test_update_none_to_pow2() void test_update_pow2_to_none() { nvbench::int64_axis axis{"TestAxis"}; - axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, - nvbench::int64_axis_flags::power_of_two); + axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two); const std::vector ref_inputs{0, 1, 2, 3, 7, 6, 5, 4}; const std::vector ref_values{1, 2, 4, 8, 128, 64, 32, 16}; @@ -304,8 +297,7 @@ void test_update_pow2_to_none() for (size_t i = 0; i < 8; ++i) { ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i])); - ASSERT(axis.get_description(i) == - fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); + ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); } } @@ -313,8 +305,7 @@ void test_update_pow2_to_pow2() { nvbench::int64_axis axis{"TestAxis"}; - axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, - nvbench::int64_axis_flags::power_of_two); + axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two); const std::vector ref_inputs{0, 1, 2, 3, 7, 6, 5, 4}; const std::vector ref_values{1, 2, 4, 8, 128, 64, 32, 16}; @@ -369,8 +360,7 @@ void test_update_pow2_to_pow2() for (size_t i = 0; i < 8; ++i) { ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i])); - ASSERT(axis.get_description(i) == - fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); + ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i])); } } diff --git a/testing/named_values.cu b/testing/named_values.cu index 8407235..8a9846f 100644 --- a/testing/named_values.cu +++ b/testing/named_values.cu @@ -18,10 +18,10 @@ #include -#include "test_asserts.cuh" - #include +#include "test_asserts.cuh" + void test_empty() { nvbench::named_values vals; diff --git a/testing/option_parser.cu b/testing/option_parser.cu index 167e833..4b01a9a 100644 --- a/testing/option_parser.cu +++ b/testing/option_parser.cu @@ -16,15 +16,14 @@ * limitations under the License. */ -#include - #include +#include #include -#include "test_asserts.cuh" - #include +#include "test_asserts.cuh" + //============================================================================== // Declare a couple benchmarks for testing: void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); } @@ -50,8 +49,7 @@ NVBENCH_BENCH_TYPES(TestBench, NVBENCH_TYPE_AXES(Ts, Us)) namespace { -[[nodiscard]] std::string -states_to_string(const std::vector &states) +[[nodiscard]] std::string states_to_string(const std::vector &states) { fmt::memory_buffer buffer; std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} " @@ -88,7 +86,7 @@ states_to_string(const std::vector &states) // Expects the parser to have a single TestBench benchmark. Runs the benchmark // and returns the resulting states. -[[nodiscard]] const auto& parser_to_states(nvbench::option_parser &parser) +[[nodiscard]] const auto &parser_to_states(nvbench::option_parser &parser) { const auto &benches = parser.get_benchmarks(); ASSERT(benches.size() == 1); @@ -267,8 +265,7 @@ void test_int64_axis_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -308,8 +305,7 @@ void test_int64_axis_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -323,8 +319,7 @@ void test_int64_axis_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -369,8 +364,7 @@ void test_int64_axis_pow2_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -384,8 +378,7 @@ void test_int64_axis_pow2_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -425,8 +418,7 @@ void test_int64_axis_pow2_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -440,8 +432,7 @@ void test_int64_axis_pow2_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -486,8 +477,7 @@ void test_int64_axis_none_to_pow2_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -501,8 +491,7 @@ void test_int64_axis_none_to_pow2_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -542,8 +531,7 @@ void test_int64_axis_none_to_pow2_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -557,8 +545,7 @@ void test_int64_axis_none_to_pow2_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -617,8 +604,7 @@ void test_int64_axis_pow2_to_none_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -658,8 +644,7 @@ void test_int64_axis_pow2_to_none_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -673,8 +658,7 @@ void test_int64_axis_pow2_to_none_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -719,8 +703,7 @@ void test_float64_axis_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -734,10 +717,7 @@ void test_float64_axis_single() { nvbench::option_parser parser; - parser.parse({"--benchmark", - "TestBench", - "--axis", - " Floats [ ] = [ 3.5 : 3.6 : 1 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 3.6 : 1 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -777,8 +757,7 @@ void test_float64_axis_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -792,18 +771,14 @@ void test_float64_axis_multi() { nvbench::option_parser parser; - parser.parse({"--benchmark", - "TestBench", - "--axis", - " Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"}); + parser.parse({"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -827,8 +802,7 @@ void test_string_axis_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -842,8 +816,7 @@ void test_string_axis_single() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -883,8 +856,7 @@ void test_string_axis_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -951,8 +923,7 @@ void test_type_axis_multi() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "}); + parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "}); const auto test = parser_to_state_string(parser); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } @@ -1177,9 +1148,8 @@ void test_axis_before_benchmark() void test_min_samples() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "DummyBench", "--min-samples", "12345"}); - const auto& states = parser_to_states(parser); + parser.parse({"--benchmark", "DummyBench", "--min-samples", "12345"}); + const auto &states = parser_to_states(parser); ASSERT(states.size() == 1); ASSERT(states[0].get_min_samples() == 12345); @@ -1188,9 +1158,8 @@ void test_min_samples() void test_min_time() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "DummyBench", "--min-time", "12345e2"}); - const auto& states = parser_to_states(parser); + parser.parse({"--benchmark", "DummyBench", "--min-time", "12345e2"}); + const auto &states = parser_to_states(parser); ASSERT(states.size() == 1); ASSERT(std::abs(states[0].get_min_time() - 12345e2) < 1.); @@ -1199,9 +1168,8 @@ void test_min_time() void test_max_noise() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "DummyBench", "--max-noise", "50.3"}); - const auto& states = parser_to_states(parser); + parser.parse({"--benchmark", "DummyBench", "--max-noise", "50.3"}); + const auto &states = parser_to_states(parser); ASSERT(states.size() == 1); ASSERT(std::abs(states[0].get_max_noise() - 0.503) < 1.e-4); @@ -1210,9 +1178,8 @@ void test_max_noise() void test_skip_time() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "DummyBench", "--skip-time", "12345e2"}); - const auto& states = parser_to_states(parser); + parser.parse({"--benchmark", "DummyBench", "--skip-time", "12345e2"}); + const auto &states = parser_to_states(parser); ASSERT(states.size() == 1); ASSERT(std::abs(states[0].get_skip_time() - 12345e2) < 1.); @@ -1221,9 +1188,8 @@ void test_skip_time() void test_timeout() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "DummyBench", "--timeout", "12345e2"}); - const auto& states = parser_to_states(parser); + parser.parse({"--benchmark", "DummyBench", "--timeout", "12345e2"}); + const auto &states = parser_to_states(parser); ASSERT(states.size() == 1); ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.); @@ -1232,12 +1198,15 @@ void test_timeout() void test_stopping_criterion() { nvbench::option_parser parser; - parser.parse( - {"--benchmark", "DummyBench", - "--stopping-criterion", "entropy", - "--max-angle", "0.42", - "--min-r2", "0.6"}); - const auto& states = parser_to_states(parser); + parser.parse({"--benchmark", + "DummyBench", + "--stopping-criterion", + "entropy", + "--max-angle", + "0.42", + "--min-r2", + "0.6"}); + const auto &states = parser_to_states(parser); ASSERT(states.size() == 1); ASSERT(states[0].get_stopping_criterion() == "entropy"); diff --git a/testing/range.cu b/testing/range.cu index 53226cd..95c8765 100644 --- a/testing/range.cu +++ b/testing/range.cu @@ -22,12 +22,9 @@ void test_basic() { - ASSERT((nvbench::range(0, 6) == - std::vector{0, 1, 2, 3, 4, 5, 6})); - ASSERT((nvbench::range(0, 6, 1) == - std::vector{0, 1, 2, 3, 4, 5, 6})); - ASSERT( - (nvbench::range(0, 6, 2) == std::vector{0, 2, 4, 6})); + ASSERT((nvbench::range(0, 6) == std::vector{0, 1, 2, 3, 4, 5, 6})); + ASSERT((nvbench::range(0, 6, 1) == std::vector{0, 1, 2, 3, 4, 5, 6})); + ASSERT((nvbench::range(0, 6, 2) == std::vector{0, 2, 4, 6})); ASSERT((nvbench::range(0, 6, 3) == std::vector{0, 3, 6})); ASSERT((nvbench::range(0, 6, 4) == std::vector{0, 4})); ASSERT((nvbench::range(0, 6, 5) == std::vector{0, 5})); @@ -37,26 +34,19 @@ void test_basic() void test_result_type() { // All ints should turn into int64 by default: - ASSERT((std::is_same_v>)); - ASSERT((std::is_same_v>)); - ASSERT((std::is_same_v>)); + ASSERT((std::is_same_v>)); + ASSERT((std::is_same_v>)); + ASSERT((std::is_same_v>)); // All floats should turn into float64 by default: - ASSERT((std::is_same_v>)); - ASSERT((std::is_same_v>)); + ASSERT((std::is_same_v>)); + ASSERT((std::is_same_v>)); // Other types may be explicitly specified: - ASSERT((std::is_same_v(0.f, 1.f)), + ASSERT((std::is_same_v(0.f, 1.f)), std::vector>)); - ASSERT((std::is_same_v< - decltype(nvbench::range(0, 1)), - std::vector>)); + ASSERT((std::is_same_v(0, 1)), + std::vector>)); } void test_fp_tolerance() @@ -68,10 +58,8 @@ void test_fp_tolerance() const nvbench::float32_t stride = 1e-4f; for (std::size_t size = 1; size < 1024; ++size) { - const nvbench::float32_t end = - start + stride * static_cast(size - 1); - ASSERT_MSG(nvbench::range(start, end, stride).size() == size, - "size={}", size); + const nvbench::float32_t end = start + stride * static_cast(size - 1); + ASSERT_MSG(nvbench::range(start, end, stride).size() == size, "size={}", size); } } diff --git a/testing/reset_error.cu b/testing/reset_error.cu index 8fece93..9c20e30 100644 --- a/testing/reset_error.cu +++ b/testing/reset_error.cu @@ -2,18 +2,17 @@ #include "test_asserts.cuh" - namespace { - __global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b) - { - const auto id = blockIdx.x * blockDim.x + threadIdx.x; - b[id] = 5 * a[id]; - } +__global__ void multiply5(const int32_t *__restrict__ a, int32_t *__restrict__ b) +{ + const auto id = blockIdx.x * blockDim.x + threadIdx.x; + b[id] = 5 * a[id]; } +} // namespace int main() -{ +{ multiply5<<<256, 256>>>(nullptr, nullptr); try diff --git a/testing/ring_buffer.cu b/testing/ring_buffer.cu index 5af5343..35199a6 100644 --- a/testing/ring_buffer.cu +++ b/testing/ring_buffer.cu @@ -18,14 +18,13 @@ #include -#include "test_asserts.cuh" - #include #include +#include "test_asserts.cuh" + template -bool equal(const nvbench::detail::ring_buffer &buffer, - const std::vector &reference) +bool equal(const nvbench::detail::ring_buffer &buffer, const std::vector &reference) { return std::equal(buffer.begin(), buffer.end(), reference.begin()); } diff --git a/testing/runner.cu b/testing/runner.cu index 6335d27..4d3dfc9 100644 --- a/testing/runner.cu +++ b/testing/runner.cu @@ -16,23 +16,22 @@ * limitations under the License. */ -#include - #include #include +#include #include #include #include #include -#include "test_asserts.cuh" - #include #include #include #include +#include "test_asserts.cuh" + template std::vector sort(std::vector &&vec) { @@ -65,21 +64,16 @@ using misc_types = nvbench::type_list; using type_axes = nvbench::type_list; template -void template_no_op_generator(nvbench::state &state, - nvbench::type_list) +void template_no_op_generator(nvbench::state &state, nvbench::type_list) { - ASSERT(nvbench::type_strings::input_string() == - state.get_string("FloatT")); - ASSERT(nvbench::type_strings::input_string() == - state.get_string("IntT")); - ASSERT(nvbench::type_strings::input_string() == - state.get_string("IntT")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("FloatT")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("IntT")); + ASSERT(nvbench::type_strings::input_string() == state.get_string("IntT")); // Enum params using non-templated version: no_op_generator(state); } -NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, - template_no_op_callable); +NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable); void test_empty() { diff --git a/testing/state.cu b/testing/state.cu index a2300d1..de7848b 100644 --- a/testing/state.cu +++ b/testing/state.cu @@ -16,10 +16,9 @@ * limitations under the License. */ -#include - #include #include +#include #include #include @@ -43,8 +42,7 @@ struct state_tester : public nvbench::state void set_param(std::string name, T &&value) { this->state::m_axis_values.set_value(std::move(name), - nvbench::named_values::value_type{ - std::forward(value)}); + nvbench::named_values::value_type{std::forward(value)}); } }; } // namespace nvbench::detail diff --git a/testing/state_generator.cu b/testing/state_generator.cu index f75be02..9042fef 100644 --- a/testing/state_generator.cu +++ b/testing/state_generator.cu @@ -16,17 +16,16 @@ * limitations under the License. */ -#include - #include #include #include #include - -#include "test_asserts.cuh" +#include #include +#include "test_asserts.cuh" + // Mock up a benchmark for testing: void dummy_generator(nvbench::state &) {} NVBENCH_DEFINE_CALLABLE(dummy_generator, dummy_callable); @@ -37,7 +36,7 @@ using ints = nvbench::type_list; using misc = nvbench::type_list; using type_axes = nvbench::type_list; template -void template_generator(nvbench::state &, nvbench::type_list){}; +void template_generator(nvbench::state &, nvbench::type_list) {}; NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_generator, template_callable); using template_bench = nvbench::benchmark; @@ -151,20 +150,16 @@ void test_create() bench.set_devices(std::vector{}); bench.add_float64_axis("Radians", {3.14, 6.28}); bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none); - bench.add_int64_axis("NumInputs", - {10, 15, 20}, - nvbench::int64_axis_flags::power_of_two); + bench.add_int64_axis("NumInputs", {10, 15, 20}, nvbench::int64_axis_flags::power_of_two); bench.add_string_axis("Strategy", {"Recursive", "Iterative"}); - const std::vector states = - nvbench::detail::state_generator::create(bench); + const std::vector states = nvbench::detail::state_generator::create(bench); // 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36 ASSERT(states.size() == 36); fmt::memory_buffer buffer; - const std::string table_format = - "| {:^5} | {:^10} | {:^7} | {:^7} | {:^9} | {:^9} |\n"; + const std::string table_format = "| {:^5} | {:^10} | {:^7} | {:^7} | {:^9} | {:^9} |\n"; fmt::format_to(std::back_inserter(buffer), "\n"); fmt::format_to(std::back_inserter(buffer), @@ -241,13 +236,10 @@ void test_create_with_types() bench.set_type_axes_names({"Floats", "Ints", "Misc"}); bench.add_float64_axis("Radians", {3.14, 6.28}); bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none); - bench.add_int64_axis("NumInputs", - {10, 15, 20}, - nvbench::int64_axis_flags::power_of_two); + bench.add_int64_axis("NumInputs", {10, 15, 20}, nvbench::int64_axis_flags::power_of_two); bench.add_string_axis("Strategy", {"Recursive", "Iterative"}); - const std::vector states = - nvbench::detail::state_generator::create(bench); + const std::vector states = nvbench::detail::state_generator::create(bench); // - 2 (Floats) * 2 (Ints) * 2 (Misc) = 8 total type_configs // - 2 (Radians) * 3 (VecSize) * 3 (NumInputs) * 2 (Strategy) = 36 non_type @@ -591,17 +583,14 @@ void test_create_with_masked_types() bench.set_type_axes_names({"Floats", "Ints", "Misc"}); bench.add_float64_axis("Radians", {3.14, 6.28}); bench.add_int64_axis("VecSize", {2, 3, 4}, nvbench::int64_axis_flags::none); - bench.add_int64_axis("NumInputs", - {10, 15, 20}, - nvbench::int64_axis_flags::power_of_two); + bench.add_int64_axis("NumInputs", {10, 15, 20}, nvbench::int64_axis_flags::power_of_two); bench.add_string_axis("Strategy", {"Recursive", "Iterative"}); // Mask out some types: bench.get_axes().get_type_axis("Floats").set_active_inputs({"F32"}); bench.get_axes().get_type_axis("Ints").set_active_inputs({"I64"}); - const std::vector states = - nvbench::detail::state_generator::create(bench); + const std::vector states = nvbench::detail::state_generator::create(bench); fmt::memory_buffer buffer; std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} " @@ -728,8 +717,7 @@ void test_devices() bench.add_string_axis("S", {"foo", "bar"}); bench.add_int64_axis("I", {2, 4}); - const std::vector states = - nvbench::detail::state_generator::create(bench); + const std::vector states = nvbench::detail::state_generator::create(bench); // 3 devices * 4 axis configs = 12 total states ASSERT(states.size() == 12); @@ -791,8 +779,7 @@ void test_termination_criteria() bench.set_skip_time(skip_time); bench.set_timeout(timeout); - const std::vector states = - nvbench::detail::state_generator::create(bench); + const std::vector states = nvbench::detail::state_generator::create(bench); ASSERT(states.size() == 1); ASSERT(min_samples == states[0].get_min_samples()); diff --git a/testing/statistics.cu b/testing/statistics.cu index a67a044..db4a40d 100644 --- a/testing/statistics.cu +++ b/testing/statistics.cu @@ -19,18 +19,18 @@ #include #include -#include "test_asserts.cuh" - #include #include +#include "test_asserts.cuh" + namespace statistics = nvbench::detail::statistics; void test_mean() { { std::vector data{1.0, 2.0, 3.0, 4.0, 5.0}; - const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data)); + const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data)); const nvbench::float64_t expected = 3.0; ASSERT(std::abs(actual - expected) < 0.001); } @@ -46,7 +46,8 @@ void test_std() { std::vector data{1.0, 2.0, 3.0, 4.0, 5.0}; const nvbench::float64_t mean = 3.0; - const nvbench::float64_t actual = statistics::standard_deviation(std::begin(data), std::end(data), mean); + const nvbench::float64_t actual = + statistics::standard_deviation(std::begin(data), std::end(data), mean); const nvbench::float64_t expected = 1.581; ASSERT(std::abs(actual - expected) < 0.001); } @@ -78,7 +79,8 @@ void test_r2() { std::vector ys{1.0, 2.0, 3.0, 4.0, 5.0}; auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); - const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); + const nvbench::float64_t actual = + statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); const nvbench::float64_t expected = 1.0; ASSERT(std::abs(actual - expected) < 0.001); } @@ -95,7 +97,8 @@ void test_r2() auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); const nvbench::float64_t expected = 0.675; - const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); + const nvbench::float64_t actual = + statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); ASSERT(std::abs(actual - expected) < 0.001); } } @@ -103,17 +106,17 @@ void test_r2() void test_slope_conversion() { { - const nvbench::float64_t actual = statistics::slope2deg(0.0); + const nvbench::float64_t actual = statistics::slope2deg(0.0); const nvbench::float64_t expected = 0.0; ASSERT(std::abs(actual - expected) < 0.001); } { - const nvbench::float64_t actual = statistics::slope2deg(1.0); + const nvbench::float64_t actual = statistics::slope2deg(1.0); const nvbench::float64_t expected = 45.0; ASSERT(std::abs(actual - expected) < 0.001); } { - const nvbench::float64_t actual = statistics::slope2deg(5.0); + const nvbench::float64_t actual = statistics::slope2deg(5.0); const nvbench::float64_t expected = 78.69; ASSERT(std::abs(actual - expected) < 0.001); } diff --git a/testing/stdrel_criterion.cu b/testing/stdrel_criterion.cu index b8bfc4e..fbe1dab 100644 --- a/testing/stdrel_criterion.cu +++ b/testing/stdrel_criterion.cu @@ -20,13 +20,13 @@ #include #include -#include "test_asserts.cuh" - #include #include #include #include +#include "test_asserts.cuh" + void test_const() { nvbench::criterion_params params; @@ -46,14 +46,14 @@ std::vector generate(double mean, double rel_std_dev, int size) std::mt19937 gen(seed++); std::vector v(static_cast(size)); std::normal_distribution dist(mean, mean * rel_std_dev); - std::generate(v.begin(), v.end(), [&]{ return dist(gen); }); + std::generate(v.begin(), v.end(), [&] { return dist(gen); }); return v; } void test_stdrel() { - const nvbench::int64_t size = 10; - const nvbench::float64_t mean = 42.0; + const nvbench::int64_t size = 10; + const nvbench::float64_t mean = 42.0; const nvbench::float64_t max_noise = 0.1; nvbench::criterion_params params; @@ -62,7 +62,7 @@ void test_stdrel() nvbench::detail::stdrel_criterion criterion; criterion.initialize(params); - for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size)) + for (nvbench::float64_t measurement : generate(mean, max_noise / 2, size)) { criterion.add_measurement(measurement); } @@ -71,7 +71,7 @@ void test_stdrel() params.set_float64("max-noise", max_noise); criterion.initialize(params); - for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size)) + for (nvbench::float64_t measurement : generate(mean, max_noise * 2, size)) { criterion.add_measurement(measurement); } diff --git a/testing/string_axis.cu b/testing/string_axis.cu index 1fb3d6f..80bf9c8 100644 --- a/testing/string_axis.cu +++ b/testing/string_axis.cu @@ -32,8 +32,7 @@ void test_empty() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Empty"); @@ -61,8 +60,7 @@ void test_basic() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Basic"); diff --git a/testing/test_asserts.cuh b/testing/test_asserts.cuh index 58be9a1..704cd19 100644 --- a/testing/test_asserts.cuh +++ b/testing/test_asserts.cuh @@ -23,55 +23,54 @@ #include #include -#define ASSERT(cond) \ - do \ - { \ - if (cond) \ - {} \ - else \ - { \ - fmt::print("{}:{}: Assertion failed ({}).\n", __FILE__, __LINE__, #cond); \ - std::fflush(stdout); \ - throw std::runtime_error("Unit test failure."); \ - } \ +#define ASSERT(cond) \ + do \ + { \ + if (cond) \ + { \ + } \ + else \ + { \ + fmt::print("{}:{}: Assertion failed ({}).\n", __FILE__, __LINE__, #cond); \ + std::fflush(stdout); \ + throw std::runtime_error("Unit test failure."); \ + } \ } while (false) -#define ASSERT_MSG(cond, fmtstr, ...) \ - do \ - { \ - if (cond) \ - {} \ - else \ - { \ - fmt::print("{}:{}: Test assertion failed ({}) {}\n", \ - __FILE__, \ - __LINE__, \ - #cond, \ - fmt::format(fmtstr, __VA_ARGS__)); \ - std::fflush(stdout); \ - throw std::runtime_error("Unit test failure."); \ - } \ +#define ASSERT_MSG(cond, fmtstr, ...) \ + do \ + { \ + if (cond) \ + { \ + } \ + else \ + { \ + fmt::print("{}:{}: Test assertion failed ({}) {}\n", \ + __FILE__, \ + __LINE__, \ + #cond, \ + fmt::format(fmtstr, __VA_ARGS__)); \ + std::fflush(stdout); \ + throw std::runtime_error("Unit test failure."); \ + } \ } while (false) -#define ASSERT_THROWS_ANY(expr) \ - do \ - { \ - bool threw = false; \ - try \ - { \ - expr; \ - } \ - catch (...) \ - { \ - threw = true; \ - } \ - if (!threw) \ - { \ - fmt::print("{}:{}: Expression expected exception: '{}'.", \ - __FILE__, \ - __LINE__, \ - #expr); \ - std::fflush(stdout); \ - throw std::runtime_error("Unit test failure."); \ - } \ +#define ASSERT_THROWS_ANY(expr) \ + do \ + { \ + bool threw = false; \ + try \ + { \ + expr; \ + } \ + catch (...) \ + { \ + threw = true; \ + } \ + if (!threw) \ + { \ + fmt::print("{}:{}: Expression expected exception: '{}'.", __FILE__, __LINE__, #expr); \ + std::fflush(stdout); \ + throw std::runtime_error("Unit test failure."); \ + } \ } while (false) diff --git a/testing/type_axis.cu b/testing/type_axis.cu index 3cfff13..17c5133 100644 --- a/testing/type_axis.cu +++ b/testing/type_axis.cu @@ -17,13 +17,12 @@ */ #include - #include -#include "test_asserts.cuh" - #include +#include "test_asserts.cuh" + void test_empty() { nvbench::type_axis axis("Basic", 0); @@ -39,8 +38,7 @@ void test_empty() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Basic"); @@ -63,8 +61,7 @@ void test_single() auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - auto *clone = - dynamic_cast(clone_base.get()); + auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Single"); @@ -102,8 +99,7 @@ void test_single() void test_several() { nvbench::type_axis axis("Several", 0); - axis.set_inputs< - nvbench::type_list>(); + axis.set_inputs>(); ASSERT(axis.get_name() == "Several"); ASSERT(axis.get_size() == 3); @@ -122,8 +118,7 @@ void test_several() auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - auto *clone = - dynamic_cast(clone_base.get()); + auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_name() == "Several"); @@ -177,9 +172,8 @@ void test_several() void test_get_type_index() { nvbench::type_axis axis("GetIndexTest", 0); - axis.set_inputs< - nvbench:: - type_list>(); + axis + .set_inputs>(); ASSERT(axis.get_type_index("I8") == 0); ASSERT(axis.get_type_index("U16") == 1); @@ -188,8 +182,7 @@ void test_get_type_index() const auto clone_base = axis.clone(); ASSERT(clone_base.get() != nullptr); - const auto *clone = - dynamic_cast(clone_base.get()); + const auto *clone = dynamic_cast(clone_base.get()); ASSERT(clone != nullptr); ASSERT(clone->get_type_index("I8") == 0); diff --git a/testing/type_list.cu b/testing/type_list.cu index 2e64e6d..99b1ee2 100644 --- a/testing/type_list.cu +++ b/testing/type_list.cu @@ -17,11 +17,8 @@ */ #include - #include -#include "test_asserts.cuh" - #include #include @@ -30,6 +27,8 @@ #include #include +#include "test_asserts.cuh" + // Unique, numbered types for testing type_list functionality. using T0 = std::integral_constant; using T1 = std::integral_constant; @@ -80,14 +79,13 @@ struct test_concat struct empty_tests { - static_assert( - std::is_same_v, TLEmpty>); + static_assert(std::is_same_v, TLEmpty>); static_assert(std::is_same_v, TL012>); static_assert(std::is_same_v, TL012>); }; - static_assert(std::is_same_v, - nvbench::type_list>); + static_assert( + std::is_same_v, nvbench::type_list>); }; struct test_prepend_each @@ -97,8 +95,7 @@ struct test_prepend_each using T23 = nvbench::type_list; using TLs = nvbench::type_list; - using Expected = nvbench::type_list, - nvbench::type_list>; + using Expected = nvbench::type_list, nvbench::type_list>; static_assert(std::is_same_v, Expected>); }; @@ -110,16 +107,12 @@ struct test_empty_cartesian_product struct test_single_cartesian_product { - using prod_1 = - nvbench::tl::cartesian_product>>; - static_assert( - std::is_same_v>>); + using prod_1 = nvbench::tl::cartesian_product>>; + static_assert(std::is_same_v>>); - using prod_2 = nvbench::tl::cartesian_product< - nvbench::type_list>>; - static_assert(std::is_same_v, - nvbench::type_list>>); + using prod_2 = nvbench::tl::cartesian_product>>; + static_assert( + std::is_same_v, nvbench::type_list>>); }; struct test_cartesian_product