Merge pull request #209 from alliepiper/pre-commit-ci

Add pre-commit.ci configs, format.
This commit is contained in:
Allison Piper
2025-04-14 14:05:48 -04:00
committed by GitHub
109 changed files with 903 additions and 967 deletions

View File

@@ -41,16 +41,28 @@ CompactNamespaces: false
ContinuationIndentWidth: 2 ContinuationIndentWidth: 2
IncludeBlocks: Regroup IncludeBlocks: Regroup
IncludeCategories: IncludeCategories:
- Regex: '^<cub'
Priority: 1
- Regex: '^<thrust'
Priority: 2
- Regex: '^<cuda'
Priority: 3
- Regex: '^<nvbench' - Regex: '^<nvbench'
Priority: 1
- Regex: '^<cub'
Priority: 2
- Regex: '^<thrust'
Priority: 3
- Regex: '^<cuda/'
Priority: 4 Priority: 4
- Regex: '^<[a-z]*>$' - Regex: '^<cuda'
Priority: 5 Priority: 5
- Regex: '^<nvml'
Priority: 6
- Regex: '^<cupti'
Priority: 7
- Regex: '^<nvperf'
Priority: 8
- Regex: '^<nlohmann'
Priority: 9
- Regex: '^<fmt'
Priority: 10
- Regex: '^<[a-z_]*>$'
Priority: 11
IndentCaseLabels: true IndentCaseLabels: true
IndentPPDirectives: None IndentPPDirectives: None
IndentWidth: 2 IndentWidth: 2

View File

@@ -34,7 +34,7 @@ CCCL uses [Development Containers](https://containers.dev/) to provide consisten
- Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it. - Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it.
![Shows "Reopen in Container" in command pallete.](./img/open_in_container_manual.png) ![Shows "Reopen in Container" in command palette.](./img/open_in_container_manual.png)
4. Select an environment with the desired CTK and host compiler from the list: 4. Select an environment with the desired CTK and host compiler from the list:
@@ -136,7 +136,7 @@ For more information, see the `.devcontainer/make_devcontainers.sh --help` messa
2. Install WSL 2 by running: 2. Install WSL 2 by running:
```bash ```bash
wsl --install wsl --install
``` ```
This should probably install Ubuntu distro as a default. This should probably install Ubuntu distro as a default.
@@ -182,14 +182,14 @@ then run `sudo systemctl restart docker.service`.
10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located). 10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located).
11. If prompted, choose `Reopen in Container`. 11. If prompted, choose `Reopen in Container`.
- If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`. - If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`.
12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order. 12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order.
From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration: From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration:
13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message. 13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
> Failed opening a web browser at https://github.com/login/device > Failed opening a web browser at https://github.com/login/device
exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH

View File

@@ -304,4 +304,3 @@ main() {
} }
main "$@" main "$@"

View File

@@ -14,4 +14,4 @@
# #
# Only add commits that are pure formatting changes (e.g. clang-format version changes, etc). # Only add commits that are pure formatting changes (e.g. clang-format version changes, etc).
8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100 8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100
3440855dbd405db614861885ad1577fffd882867 # Initial addition of pre-commit.ci formatting.

70
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,70 @@
# Copyright (c) 2025, NVIDIA CORPORATION.
ci:
autofix_commit_msg: |
[pre-commit.ci] auto code formatting
autofix_prs: false
autoupdate_branch: ''
autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
autoupdate_schedule: quarterly
skip: []
submodules: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: end-of-file-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.6
hooks:
- id: clang-format
types_or: [file]
files: |
(?x)^(
^.*\.c$|
^.*\.cpp$|
^.*\.cu$|
^.*\.cuh$|
^.*\.cxx$|
^.*\.h$|
^.*\.hpp$|
^.*\.inl$|
^.*\.mm$
)
args: ["-fallback-style=none", "-style=file", "-i"]
# TODO/REMINDER: add the Ruff vscode extension to the devcontainers
# Ruff, the Python auto-correcting linter/formatter written in Rust
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
hooks:
- id: ruff # linter
- id: ruff-format # formatter
# TOML lint & format
- repo: https://github.com/ComPWA/taplo-pre-commit
rev: v0.9.3
hooks:
# See https://github.com/NVIDIA/cccl/issues/3426
# - id: taplo-lint
# exclude: "^docs/"
- id: taplo-format
exclude: "^docs/"
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: |
(?x)^(
build|
CITATION.md
)
default_language_version:
python: python3

View File

@@ -65,6 +65,7 @@ long_ext_count = 10
class Target: class Target:
"""Represents a single line read for a .ninja_log file.""" """Represents a single line read for a .ninja_log file."""
def __init__(self, start, end): def __init__(self, start, end):
"""Creates a target object by passing in the start/end times in seconds """Creates a target object by passing in the start/end times in seconds
as a float.""" as a float."""
@@ -94,9 +95,9 @@ class Target:
""" """
# Allow for modest floating-point errors # Allow for modest floating-point errors
epsilon = 0.000002 epsilon = 0.000002
if (self.weighted_duration > self.Duration() + epsilon): if self.weighted_duration > self.Duration() + epsilon:
print('%s > %s?' % (self.weighted_duration, self.Duration())) print("%s > %s?" % (self.weighted_duration, self.Duration()))
assert (self.weighted_duration <= self.Duration() + epsilon) assert self.weighted_duration <= self.Duration() + epsilon
return self.weighted_duration return self.weighted_duration
def DescribeTargets(self): def DescribeTargets(self):
@@ -104,10 +105,10 @@ class Target:
# Some build steps generate dozens of outputs - handle them sanely. # Some build steps generate dozens of outputs - handle them sanely.
# The max_length was chosen so that it can fit most of the long # The max_length was chosen so that it can fit most of the long
# single-target names, while minimizing word wrapping. # single-target names, while minimizing word wrapping.
result = ', '.join(self.targets) result = ", ".join(self.targets)
max_length = 65 max_length = 65
if len(result) > max_length: if len(result) > max_length:
result = result[:max_length] + '...' result = result[:max_length] + "..."
return result return result
@@ -121,12 +122,11 @@ def ReadTargets(log, show_all):
# targets. # targets.
if not header: if not header:
return [] return []
assert header == '# ninja log v5\n', \ assert header == "# ninja log v5\n", "unrecognized ninja log version %r" % header
'unrecognized ninja log version %r' % header
targets_dict = {} targets_dict = {}
last_end_seen = 0.0 last_end_seen = 0.0
for line in log: for line in log:
parts = line.strip().split('\t') parts = line.strip().split("\t")
if len(parts) != 5: if len(parts) != 5:
# If ninja.exe is rudely halted then the .ninja_log file may be # If ninja.exe is rudely halted then the .ninja_log file may be
# corrupt. Silently continue. # corrupt. Silently continue.
@@ -165,17 +165,17 @@ def ReadTargets(log, show_all):
def GetExtension(target, extra_patterns): def GetExtension(target, extra_patterns):
"""Return the file extension that best represents a target. """Return the file extension that best represents a target.
For targets that generate multiple outputs it is important to return a For targets that generate multiple outputs it is important to return a
consistent 'canonical' extension. Ultimately the goal is to group build steps consistent 'canonical' extension. Ultimately the goal is to group build steps
by type.""" by type."""
for output in target.targets: for output in target.targets:
if extra_patterns: if extra_patterns:
for fn_pattern in extra_patterns.split(';'): for fn_pattern in extra_patterns.split(";"):
if fnmatch.fnmatch(output, '*' + fn_pattern + '*'): if fnmatch.fnmatch(output, "*" + fn_pattern + "*"):
return fn_pattern return fn_pattern
# Not a true extension, but a good grouping. # Not a true extension, but a good grouping.
if output.endswith('type_mappings'): if output.endswith("type_mappings"):
extension = 'type_mappings' extension = "type_mappings"
break break
# Capture two extensions if present. For example: file.javac.jar should # Capture two extensions if present. For example: file.javac.jar should
@@ -185,26 +185,26 @@ def GetExtension(target, extra_patterns):
extension = ext2 + ext1 # Preserve the order in the file name. extension = ext2 + ext1 # Preserve the order in the file name.
if len(extension) == 0: if len(extension) == 0:
extension = '(no extension found)' extension = "(no extension found)"
if ext1 in ['.pdb', '.dll', '.exe']: if ext1 in [".pdb", ".dll", ".exe"]:
extension = 'PEFile (linking)' extension = "PEFile (linking)"
# Make sure that .dll and .exe are grouped together and that the # Make sure that .dll and .exe are grouped together and that the
# .dll.lib files don't cause these to be listed as libraries # .dll.lib files don't cause these to be listed as libraries
break break
if ext1 in ['.so', '.TOC']: if ext1 in [".so", ".TOC"]:
extension = '.so (linking)' extension = ".so (linking)"
# Attempt to identify linking, avoid identifying as '.TOC' # Attempt to identify linking, avoid identifying as '.TOC'
break break
# Make sure .obj files don't get categorized as mojo files # Make sure .obj files don't get categorized as mojo files
if ext1 in ['.obj', '.o']: if ext1 in [".obj", ".o"]:
break break
# Jars are the canonical output of java targets. # Jars are the canonical output of java targets.
if ext1 == '.jar': if ext1 == ".jar":
break break
# Normalize all mojo related outputs to 'mojo'. # Normalize all mojo related outputs to 'mojo'.
if output.count('.mojom') > 0: if output.count(".mojom") > 0:
extension = 'mojo' extension = "mojo"
break break
return extension return extension
@@ -229,8 +229,8 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
if target.end > latest: if target.end > latest:
latest = target.end latest = target.end
total_cpu_time += target.Duration() total_cpu_time += target.Duration()
task_start_stop_times.append((target.start, 'start', target)) task_start_stop_times.append((target.start, "start", target))
task_start_stop_times.append((target.end, 'stop', target)) task_start_stop_times.append((target.end, "stop", target))
length = latest - earliest length = latest - earliest
weighted_total = 0.0 weighted_total = 0.0
@@ -256,10 +256,10 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
if num_running > 0: if num_running > 0:
# Update the total weighted time up to this moment. # Update the total weighted time up to this moment.
last_weighted_time += (time - last_time) / float(num_running) last_weighted_time += (time - last_time) / float(num_running)
if action_name == 'start': if action_name == "start":
# Record the total weighted task time when this task starts. # Record the total weighted task time when this task starts.
running_tasks[target] = last_weighted_time running_tasks[target] = last_weighted_time
if action_name == 'stop': if action_name == "stop":
# Record the change in the total weighted task time while this task # Record the change in the total weighted task time while this task
# ran. # ran.
weighted_duration = last_weighted_time - running_tasks[target] weighted_duration = last_weighted_time - running_tasks[target]
@@ -267,24 +267,27 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
weighted_total += weighted_duration weighted_total += weighted_duration
del running_tasks[target] del running_tasks[target]
last_time = time last_time = time
assert (len(running_tasks) == 0) assert len(running_tasks) == 0
# Warn if the sum of weighted times is off by more than half a second. # Warn if the sum of weighted times is off by more than half a second.
if abs(length - weighted_total) > 500: if abs(length - weighted_total) > 500:
print('Warning: Possible corrupt ninja log, results may be ' print(
'untrustworthy. Length = %.3f, weighted total = %.3f' % "Warning: Possible corrupt ninja log, results may be "
(length, weighted_total)) "untrustworthy. Length = %.3f, weighted total = %.3f"
% (length, weighted_total)
)
# Print the slowest build steps: # Print the slowest build steps:
print(' Longest build steps:') print(" Longest build steps:")
if elapsed_time_sorting: if elapsed_time_sorting:
entries.sort(key=lambda x: x.Duration()) entries.sort(key=lambda x: x.Duration())
else: else:
entries.sort(key=lambda x: x.WeightedDuration()) entries.sort(key=lambda x: x.WeightedDuration())
for target in entries[-long_count:]: for target in entries[-long_count:]:
print(' %8.1f weighted s to build %s (%.1f s elapsed time)' % print(
(target.WeightedDuration(), target.DescribeTargets(), " %8.1f weighted s to build %s (%.1f s elapsed time)"
target.Duration())) % (target.WeightedDuration(), target.DescribeTargets(), target.Duration())
)
# Sum up the time by file extension/type of the output file # Sum up the time by file extension/type of the output file
count_by_ext = {} count_by_ext = {}
@@ -293,51 +296,56 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
# Scan through all of the targets to build up per-extension statistics. # Scan through all of the targets to build up per-extension statistics.
for target in entries: for target in entries:
extension = GetExtension(target, extra_step_types) extension = GetExtension(target, extra_step_types)
time_by_ext[extension] = time_by_ext.get(extension, time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration()
0) + target.Duration() weighted_time_by_ext[extension] = (
weighted_time_by_ext[extension] = weighted_time_by_ext.get( weighted_time_by_ext.get(extension, 0) + target.WeightedDuration()
extension, 0) + target.WeightedDuration() )
count_by_ext[extension] = count_by_ext.get(extension, 0) + 1 count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
print(' Time by build-step type:') print(" Time by build-step type:")
# Copy to a list with extension name and total time swapped, to (time, ext) # Copy to a list with extension name and total time swapped, to (time, ext)
if elapsed_time_sorting: if elapsed_time_sorting:
weighted_time_by_ext_sorted = sorted( weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in time_by_ext.items())
(y, x) for (x, y) in time_by_ext.items())
else: else:
weighted_time_by_ext_sorted = sorted( weighted_time_by_ext_sorted = sorted(
(y, x) for (x, y) in weighted_time_by_ext.items()) (y, x) for (x, y) in weighted_time_by_ext.items()
)
# Print the slowest build target types: # Print the slowest build target types:
for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]: for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
print( print(
' %8.1f s weighted time to generate %d %s files ' " %8.1f s weighted time to generate %d %s files "
'(%1.1f s elapsed time sum)' % "(%1.1f s elapsed time sum)"
(time, count_by_ext[extension], extension, time_by_ext[extension])) % (time, count_by_ext[extension], extension, time_by_ext[extension])
)
print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx ' print(
'parallelism)' % " %.1f s weighted time (%.1f s elapsed time sum, %1.1fx "
(length, total_cpu_time, total_cpu_time * 1.0 / length)) "parallelism)" % (length, total_cpu_time, total_cpu_time * 1.0 / length)
print(' %d build steps completed, average of %1.2f/s' % )
(len(entries), len(entries) / (length))) print(
" %d build steps completed, average of %1.2f/s"
% (len(entries), len(entries) / (length))
)
def main(): def main():
log_file = '.ninja_log' log_file = ".ninja_log"
metrics_file = 'siso_metrics.json' metrics_file = "siso_metrics.json"
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-C', dest='build_directory', help='Build directory.') parser.add_argument("-C", dest="build_directory", help="Build directory.")
parser.add_argument( parser.add_argument(
'-s', "-s",
'--step-types', "--step-types",
help='semicolon separated fnmatch patterns for build-step grouping') help="semicolon separated fnmatch patterns for build-step grouping",
)
parser.add_argument( parser.add_argument(
'-e', "-e",
'--elapsed_time_sorting', "--elapsed_time_sorting",
default=False, default=False,
action='store_true', action="store_true",
help='Sort output by elapsed time instead of weighted time') help="Sort output by elapsed time instead of weighted time",
parser.add_argument('--log-file', )
help="specific ninja log file to analyze.") parser.add_argument("--log-file", help="specific ninja log file to analyze.")
args, _extra_args = parser.parse_known_args() args, _extra_args = parser.parse_known_args()
if args.build_directory: if args.build_directory:
log_file = os.path.join(args.build_directory, log_file) log_file = os.path.join(args.build_directory, log_file)
@@ -348,34 +356,35 @@ def main():
# Offer a convenient way to add extra step types automatically, # Offer a convenient way to add extra step types automatically,
# including when this script is run by autoninja. get() returns None if # including when this script is run by autoninja. get() returns None if
# the variable isn't set. # the variable isn't set.
args.step_types = os.environ.get('chromium_step_types') args.step_types = os.environ.get("chromium_step_types")
if args.step_types: if args.step_types:
# Make room for the extra build types. # Make room for the extra build types.
global long_ext_count global long_ext_count
long_ext_count += len(args.step_types.split(';')) long_ext_count += len(args.step_types.split(";"))
if os.path.exists(metrics_file): if os.path.exists(metrics_file):
# Automatically handle summarizing siso builds. # Automatically handle summarizing siso builds.
cmd = ['siso.bat' if 'win32' in sys.platform else 'siso'] cmd = ["siso.bat" if "win32" in sys.platform else "siso"]
cmd.extend(['metrics', 'summary']) cmd.extend(["metrics", "summary"])
if args.build_directory: if args.build_directory:
cmd.extend(['-C', args.build_directory]) cmd.extend(["-C", args.build_directory])
if args.step_types: if args.step_types:
cmd.extend(['--step_types', args.step_types]) cmd.extend(["--step_types", args.step_types])
if args.elapsed_time_sorting: if args.elapsed_time_sorting:
cmd.append('--elapsed_time_sorting') cmd.append("--elapsed_time_sorting")
subprocess.run(cmd) subprocess.run(cmd)
else: else:
try: try:
with open(log_file, 'r') as log: with open(log_file, "r") as log:
entries = ReadTargets(log, False) entries = ReadTargets(log, False)
if entries: if entries:
SummarizeEntries(entries, args.step_types, SummarizeEntries(
args.elapsed_time_sorting) entries, args.step_types, args.elapsed_time_sorting
)
except IOError: except IOError:
print('Log file %r not found, no build summary created.' % log_file) print("Log file %r not found, no build summary created." % log_file)
return errno.ENOENT return errno.ENOENT
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -37,5 +37,5 @@ if [ $requests_diff -eq 0 ]; then
else else
hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}') hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
echo "sccache hit rate: $hit_rate%" >&2 echo "sccache hit rate: $hit_rate%" >&2
echo "$hit_rate" echo "$hit_rate"
fi fi

View File

@@ -1,4 +1,4 @@
# Detect the langauge standards supported by the current compilers. # Detect the language standards supported by the current compilers.
# #
# Usage: detect_supported_cxx_standards(<var_prefix> <lang> <standards>) # Usage: detect_supported_cxx_standards(<var_prefix> <lang> <standards>)
# #

View File

@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
# Passes all args directly to execute_process while setting up the following # Passes all args directly to execute_process while setting up the following
# results variables and propogating them to the caller's scope: # results variables and propagating them to the caller's scope:
# #
# - nvbench_process_exit_code # - nvbench_process_exit_code
# - nvbench_process_stdout # - nvbench_process_stdout

View File

@@ -9,9 +9,9 @@
// a potential macro collision and halts. // a potential macro collision and halts.
// //
// Hacky way to build a string, but it works on all tested platforms. // Hacky way to build a string, but it works on all tested platforms.
#define NVBench_MACRO_CHECK(MACRO, HEADER) \ #define NVBench_MACRO_CHECK(MACRO, HEADER) \
NVBench_MACRO_CHECK_IMPL(Identifier MACRO should not be used from NVBench \ NVBench_MACRO_CHECK_IMPL( \
headers due to conflicts with HEADER macros.) Identifier MACRO should not be used from NVBench headers due to conflicts with HEADER macros.)
// Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we // Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we
// don't want to #include any headers other than the one being tested. // don't want to #include any headers other than the one being tested.
@@ -34,8 +34,8 @@
// library implementations unconditionally `#undef` these macros, which then // library implementations unconditionally `#undef` these macros, which then
// causes random failures later. // causes random failures later.
// Leaving these commented out as a warning: Here be dragons. // Leaving these commented out as a warning: Here be dragons.
//#define min(...) NVBench_MACRO_CHECK('min', windows.h) // #define min(...) NVBench_MACRO_CHECK('min', windows.h)
//#define max(...) NVBench_MACRO_CHECK('max', windows.h) // #define max(...) NVBench_MACRO_CHECK('max', windows.h)
// termios.h conflicts (NVIDIA/thrust#1547) // termios.h conflicts (NVIDIA/thrust#1547)
#define B0 NVBench_MACRO_CHECK("B0", termios.h) #define B0 NVBench_MACRO_CHECK("B0", termios.h)

View File

@@ -90,7 +90,7 @@
before any `--benchmark` arguments. before any `--benchmark` arguments.
* `--stopping-criterion <criterion>` * `--stopping-criterion <criterion>`
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough * After `--min-samples` is satisfied, use `<criterion>` to detect if enough
samples were collected. samples were collected.
* Only applies to Cold measurements. * Only applies to Cold measurements.
* Default is stdrel (`--stopping-criterion stdrel`) * Default is stdrel (`--stopping-criterion stdrel`)

View File

@@ -24,37 +24,33 @@
template <int ItemsPerThread> template <int ItemsPerThread>
__global__ void kernel(std::size_t stride, __global__ void kernel(std::size_t stride,
std::size_t elements, std::size_t elements,
const nvbench::int32_t * __restrict__ in, const nvbench::int32_t *__restrict__ in,
nvbench::int32_t *__restrict__ out) nvbench::int32_t *__restrict__ out)
{ {
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x; const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const std::size_t step = gridDim.x * blockDim.x; const std::size_t step = gridDim.x * blockDim.x;
for (std::size_t i = stride * tid; for (std::size_t i = stride * tid; i < stride * elements; i += stride * step)
i < stride * elements;
i += stride * step)
{ {
for (int j = 0; j < ItemsPerThread; j++) for (int j = 0; j < ItemsPerThread; j++)
{ {
const auto read_id = (ItemsPerThread * i + j) % elements; const auto read_id = (ItemsPerThread * i + j) % elements;
const auto write_id = tid + j * elements; const auto write_id = tid + j * elements;
out[write_id] = in[read_id]; out[write_id] = in[read_id];
} }
} }
} }
// `throughput_bench` copies a 128 MiB buffer of int32_t, and reports throughput // `throughput_bench` copies a 128 MiB buffer of int32_t, and reports throughput
// and cache hit rates. // and cache hit rates.
// //
// Calling state.collect_*() enables particular metric collection if nvbench // Calling state.collect_*() enables particular metric collection if nvbench
// was build with CUPTI support (CMake option: -DNVBench_ENABLE_CUPTI=ON). // was build with CUPTI support (CMake option: -DNVBench_ENABLE_CUPTI=ON).
template <int ItemsPerThread> template <int ItemsPerThread>
void throughput_bench(nvbench::state &state, void throughput_bench(nvbench::state &state, nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
{ {
// Allocate input data: // Allocate input data:
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride")); const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
const std::size_t elements = 128 * 1024 * 1024 / sizeof(nvbench::int32_t); const std::size_t elements = 128 * 1024 * 1024 / sizeof(nvbench::int32_t);
thrust::device_vector<nvbench::int32_t> input(elements); thrust::device_vector<nvbench::int32_t> input(elements);
thrust::device_vector<nvbench::int32_t> output(elements * ItemsPerThread); thrust::device_vector<nvbench::int32_t> output(elements * ItemsPerThread);
@@ -72,12 +68,11 @@ void throughput_bench(nvbench::state &state,
static_cast<int>((elements + threads_in_block - 1) / threads_in_block); static_cast<int>((elements + threads_in_block - 1) / threads_in_block);
state.exec([&](nvbench::launch &launch) { state.exec([&](nvbench::launch &launch) {
kernel<ItemsPerThread> kernel<ItemsPerThread><<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
<<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>( stride,
stride, elements,
elements, thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(output.data()));
thrust::raw_pointer_cast(output.data()));
}); });
} }

View File

@@ -71,18 +71,16 @@ void copy_sweep_grid_shape(nvbench::state &state)
thrust::device_vector<nvbench::int32_t> in(num_values, 0); thrust::device_vector<nvbench::int32_t> in(num_values, 0);
thrust::device_vector<nvbench::int32_t> out(num_values, 0); thrust::device_vector<nvbench::int32_t> out(num_values, 0);
state.exec( state.exec([block_size,
[block_size, num_blocks,
num_blocks, num_values,
num_values, in_ptr = thrust::raw_pointer_cast(in.data()),
in_ptr = thrust::raw_pointer_cast(in.data()), out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { (void)num_values; // clang thinks this is unused...
(void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(in_ptr,
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>( out_ptr,
in_ptr, num_values);
out_ptr, });
num_values);
});
} }
NVBENCH_BENCH(copy_sweep_grid_shape) NVBENCH_BENCH(copy_sweep_grid_shape)
// Every second power of two from 64->1024: // Every second power of two from 64->1024:
@@ -107,15 +105,12 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list<ValueType>)
thrust::device_vector<ValueType> in(num_values, 0); thrust::device_vector<ValueType> in(num_values, 0);
thrust::device_vector<ValueType> out(num_values, 0); thrust::device_vector<ValueType> out(num_values, 0);
state.exec( state.exec([num_values,
[num_values, in_ptr = thrust::raw_pointer_cast(in.data()),
in_ptr = thrust::raw_pointer_cast(in.data()), out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { (void)num_values; // clang thinks this is unused...
(void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, });
out_ptr,
num_values);
});
} }
// Define a type_list to use for the type axis: // Define a type_list to use for the type axis:
using cts_types = nvbench::type_list<nvbench::uint8_t, using cts_types = nvbench::type_list<nvbench::uint8_t,
@@ -131,11 +126,10 @@ NVBENCH_BENCH_TYPES(copy_type_sweep, NVBENCH_TYPE_AXES(cts_types));
// Convert 64 MiB of InputTypes to OutputTypes, represented with various // Convert 64 MiB of InputTypes to OutputTypes, represented with various
// value_types. // value_types.
template <typename InputType, typename OutputType> template <typename InputType, typename OutputType>
void copy_type_conversion_sweep(nvbench::state &state, void copy_type_conversion_sweep(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
nvbench::type_list<InputType, OutputType>)
{ {
// Optional: Skip narrowing conversions. // Optional: Skip narrowing conversions.
if constexpr(sizeof(InputType) > sizeof(OutputType)) if constexpr (sizeof(InputType) > sizeof(OutputType))
{ {
state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."); state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType).");
return; return;
@@ -154,15 +148,12 @@ void copy_type_conversion_sweep(nvbench::state &state,
thrust::device_vector<InputType> in(num_values, 0); thrust::device_vector<InputType> in(num_values, 0);
thrust::device_vector<OutputType> out(num_values, 0); thrust::device_vector<OutputType> out(num_values, 0);
state.exec( state.exec([num_values,
[num_values, in_ptr = thrust::raw_pointer_cast(in.data()),
in_ptr = thrust::raw_pointer_cast(in.data()), out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { (void)num_values; // clang thinks this is unused...
(void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, });
out_ptr,
num_values);
});
} }
// Optional: Skip when InputType == OutputType. This approach avoids // Optional: Skip when InputType == OutputType. This approach avoids
// instantiating the benchmark at all. // instantiating the benchmark at all.
@@ -178,6 +169,5 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
nvbench::float32_t, nvbench::float32_t,
nvbench::int64_t, nvbench::int64_t,
nvbench::float64_t>; nvbench::float64_t>;
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
.set_type_axes_names({"In", "Out"}); .set_type_axes_names({"In", "Out"});

View File

@@ -36,10 +36,7 @@ public:
protected: protected:
// Setup the criterion in the `do_initialize()` method: // Setup the criterion in the `do_initialize()` method:
virtual void do_initialize() override virtual void do_initialize() override { m_num_samples = 0; }
{
m_num_samples = 0;
}
// Process new measurements in the `add_measurement()` method: // Process new measurements in the `add_measurement()` method:
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
@@ -52,7 +49,6 @@ protected:
{ {
return m_num_samples >= m_params.get_int64("max-samples"); return m_num_samples >= m_params.get_int64("max-samples");
} }
}; };
// Register the criterion with NVBench: // Register the criterion with NVBench:
@@ -71,7 +67,7 @@ void throughput_bench(nvbench::state &state)
state.add_global_memory_writes<nvbench::int32_t>(num_values); state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) { state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused... (void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()), thrust::raw_pointer_cast(output.data()),

View File

@@ -17,7 +17,6 @@
*/ */
#include <nvbench/nvbench.cuh> #include <nvbench/nvbench.cuh>
#include <nvbench/test_kernels.cuh> #include <nvbench/test_kernels.cuh>
// Enum to use as parameter axis: // Enum to use as parameter axis:
@@ -68,12 +67,10 @@ void runtime_enum_sweep_string(nvbench::state &state)
// Create inputs, etc, configure runtime kernel parameters, etc. // Create inputs, etc, configure runtime kernel parameters, etc.
// Just a dummy kernel. // Just a dummy kernel.
state.exec([](nvbench::launch &launch) { state.exec(
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
});
} }
NVBENCH_BENCH(runtime_enum_sweep_string) NVBENCH_BENCH(runtime_enum_sweep_string).add_string_axis("MyEnum", {"A", "B", "C"});
.add_string_axis("MyEnum", {"A", "B", "C"});
//============================================================================== //==============================================================================
// Sweep through enum values at runtime using an int64 axis. // Sweep through enum values at runtime using an int64 axis.
@@ -97,9 +94,8 @@ void runtime_enum_sweep_int64(nvbench::state &state)
// Create inputs, etc, configure runtime kernel parameters, etc. // Create inputs, etc, configure runtime kernel parameters, etc.
// Just a dummy kernel. // Just a dummy kernel.
state.exec([](nvbench::launch &launch) { state.exec(
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
});
} }
NVBENCH_BENCH(runtime_enum_sweep_int64) NVBENCH_BENCH(runtime_enum_sweep_int64)
.add_int64_axis("MyEnum", .add_int64_axis("MyEnum",
@@ -178,12 +174,10 @@ void compile_time_enum_sweep(nvbench::state &state,
// Template parameters, static dispatch, etc. // Template parameters, static dispatch, etc.
// Just a dummy kernel. // Just a dummy kernel.
state.exec([](nvbench::launch &launch) { state.exec(
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
});
} }
using MyEnumList = using MyEnumList = nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList)) NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
.set_type_axes_names({"MyEnum"}); .set_type_axes_names({"MyEnum"});
@@ -199,16 +193,14 @@ NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
// * `-12` (struct std::integral_constant<int,-12>) // * `-12` (struct std::integral_constant<int,-12>)
// ``` // ```
template <nvbench::int32_t IntValue> template <nvbench::int32_t IntValue>
void compile_time_int_sweep(nvbench::state &state, void compile_time_int_sweep(nvbench::state &state, nvbench::type_list<nvbench::enum_type<IntValue>>)
nvbench::type_list<nvbench::enum_type<IntValue>>)
{ {
// Use IntValue in compile time contexts. // Use IntValue in compile time contexts.
// Template parameters, static dispatch, etc. // Template parameters, static dispatch, etc.
// Just a dummy kernel. // Just a dummy kernel.
state.exec([](nvbench::launch &launch) { state.exec(
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
});
} }
using MyInts = nvbench::enum_type_list<0, 16, 4096, -12>; using MyInts = nvbench::enum_type_list<0, 16, 4096, -12>;
NVBENCH_BENCH_TYPES(compile_time_int_sweep, NVBENCH_TYPE_AXES(MyInts)) NVBENCH_BENCH_TYPES(compile_time_int_sweep, NVBENCH_TYPE_AXES(MyInts))

View File

@@ -53,9 +53,7 @@ void sequence_bench(nvbench::state &state)
// nvbench::exec_tag::sync indicates that this will implicitly sync: // nvbench::exec_tag::sync indicates that this will implicitly sync:
state.exec(nvbench::exec_tag::sync, [&data](nvbench::launch &launch) { state.exec(nvbench::exec_tag::sync, [&data](nvbench::launch &launch) {
thrust::sequence(thrust::device.on(launch.get_stream()), thrust::sequence(thrust::device.on(launch.get_stream()), data.begin(), data.end());
data.begin(),
data.end());
}); });
} }
NVBENCH_BENCH(sequence_bench); NVBENCH_BENCH(sequence_bench);

View File

@@ -23,8 +23,8 @@
// Thrust simplifies memory management, etc: // Thrust simplifies memory management, etc:
#include <thrust/copy.h> #include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/sequence.h> #include <thrust/sequence.h>
// mod2_inplace performs an in-place mod2 over every element in `data`. `data` // mod2_inplace performs an in-place mod2 over every element in `data`. `data`
@@ -54,7 +54,7 @@ void mod2_inplace(nvbench::state &state)
state.exec(nvbench::exec_tag::timer, state.exec(nvbench::exec_tag::timer,
// Lambda now takes a `timer` argument: // Lambda now takes a `timer` argument:
[&input, &data, num_values](nvbench::launch &launch, auto &timer) { [&input, &data, num_values](nvbench::launch &launch, auto &timer) {
(void) num_values; // clang thinks this is unused... (void)num_values; // clang thinks this is unused...
// Reset working data: // Reset working data:
thrust::copy(thrust::device.on(launch.get_stream()), thrust::copy(thrust::device.on(launch.get_stream()),

View File

@@ -72,14 +72,12 @@ NVBENCH_BENCH(runtime_skip)
// Two type axes are swept, but configurations where InputType == OutputType are // Two type axes are swept, but configurations where InputType == OutputType are
// skipped. // skipped.
template <typename InputType, typename OutputType> template <typename InputType, typename OutputType>
void skip_overload(nvbench::state &state, void skip_overload(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
nvbench::type_list<InputType, OutputType>)
{ {
// This is a contrived example that focuses on the skip overloads, so this is // This is a contrived example that focuses on the skip overloads, so this is
// just a sleep kernel: // just a sleep kernel:
state.exec([](nvbench::launch &launch) { state.exec(
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
});
} }
// Overload of skip_overload that is called when InputType == OutputType. // Overload of skip_overload that is called when InputType == OutputType.
template <typename T> template <typename T>
@@ -107,9 +105,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
{ {
// This is a contrived example that focuses on the skip overloads, so this is // This is a contrived example that focuses on the skip overloads, so this is
// just a sleep kernel: // just a sleep kernel:
state.exec([](nvbench::launch &launch) { state.exec(
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); [](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
});
} }
// Enable this overload if InputType is larger than OutputType // Enable this overload if InputType is larger than OutputType
template <typename InputType, typename OutputType> template <typename InputType, typename OutputType>
@@ -119,10 +116,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
state.skip("sizeof(InputType) > sizeof(OutputType)."); state.skip("sizeof(InputType) > sizeof(OutputType).");
} }
// The same type_list is used for both inputs/outputs. // The same type_list is used for both inputs/outputs.
using sn_types = nvbench::type_list<nvbench::int8_t, using sn_types =
nvbench::int16_t, nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
nvbench::int32_t,
nvbench::int64_t>;
// Setup benchmark: // Setup benchmark:
NVBENCH_BENCH_TYPES(skip_sfinae, NVBENCH_TYPE_AXES(sn_types, sn_types)) NVBENCH_BENCH_TYPES(skip_sfinae, NVBENCH_TYPE_AXES(sn_types, sn_types))
.set_type_axes_names({"In", "Out"}); .set_type_axes_names({"In", "Out"});

View File

@@ -52,7 +52,7 @@ void stream_bench(nvbench::state &state)
state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream)); state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream));
state.exec([&input, &output, num_values](nvbench::launch &) { state.exec([&input, &output, num_values](nvbench::launch &) {
(void) num_values; // clang thinks this is unused... (void)num_values; // clang thinks this is unused...
copy(thrust::raw_pointer_cast(input.data()), copy(thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()), thrust::raw_pointer_cast(output.data()),
num_values); num_values);

View File

@@ -26,8 +26,8 @@
void summary_example(nvbench::state &state) void summary_example(nvbench::state &state)
{ {
// Fetch parameters and compute duration in seconds: // Fetch parameters and compute duration in seconds:
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms")); const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us")); const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
const auto duration = ms * 1e-3 + us * 1e-6; const auto duration = ms * 1e-3 + us * 1e-6;
// Add a new column to the summary table with the derived duration used by the benchmark. // Add a new column to the summary table with the derived duration used by the benchmark.

View File

@@ -51,7 +51,7 @@ void throughput_bench(nvbench::state &state)
state.add_global_memory_writes<nvbench::int32_t>(num_values); state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec([&input, &output, num_values](nvbench::launch &launch) { state.exec([&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused... (void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()), thrust::raw_pointer_cast(output.data()),

View File

@@ -1,20 +1,20 @@
/* /*
* Copyright 2021 NVIDIA Corporation * Copyright 2021 NVIDIA Corporation
* *
* Licensed under the Apache License, Version 2.0 with the LLVM exception * Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. * the License.
* *
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://llvm.org/foundation/relicensing/LICENSE.txt * http://llvm.org/foundation/relicensing/LICENSE.txt
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/main.cuh> #include <nvbench/main.cuh>
@@ -24,7 +24,7 @@ int main(int argc, char const *const *argv)
try try
{ {
// If no args, substitute a new argv that prints the version // If no args, substitute a new argv that prints the version
std::vector<const char*> alt_argv; std::vector<const char *> alt_argv;
if (argc == 1) if (argc == 1)
{ {
alt_argv.push_back("--version"); alt_argv.push_back("--version");
@@ -36,7 +36,7 @@ try
NVBENCH_CUDA_CALL(cudaDeviceReset()); NVBENCH_CUDA_CALL(cudaDeviceReset());
return 0; return 0;
} }
catch (std::exception & e) catch (std::exception &e)
{ {
std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n";
return 1; return 1;

View File

@@ -19,13 +19,13 @@
#include <nvbench/axes_metadata.cuh> #include <nvbench/axes_metadata.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#include <fmt/ranges.h>
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <stdexcept> #include <stdexcept>
#include <fmt/format.h>
#include <fmt/ranges.h>
namespace nvbench namespace nvbench
{ {

View File

@@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include "axis_base.cuh" #include <nvbench/axis_base.cuh>
namespace nvbench namespace nvbench
{ {

View File

@@ -18,9 +18,8 @@
#pragma once #pragma once
#include <nvbench/benchmark_base.cuh>
#include <nvbench/axes_metadata.cuh> #include <nvbench/axes_metadata.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/runner.cuh> #include <nvbench/runner.cuh>
#include <nvbench/type_list.cuh> #include <nvbench/type_list.cuh>

View File

@@ -45,7 +45,7 @@ struct benchmark_manager
* benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA * benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA
* environment in `main`. * environment in `main`.
*/ */
void initialize(); void initialize();
/** /**
* Register a new benchmark. * Register a new benchmark.

View File

@@ -17,9 +17,8 @@
*/ */
#include <nvbench/benchmark_manager.cuh> #include <nvbench/benchmark_manager.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/device_manager.cuh>
#include <fmt/format.h> #include <fmt/format.h>
@@ -37,8 +36,8 @@ benchmark_manager &benchmark_manager::get()
void benchmark_manager::initialize() void benchmark_manager::initialize()
{ {
const auto& mgr = device_manager::get(); const auto &mgr = device_manager::get();
for (auto& bench : m_benchmarks) for (auto &bench : m_benchmarks)
{ {
if (!bench->get_is_cpu_only()) if (!bench->get_is_cpu_only())
{ {

View File

@@ -17,12 +17,10 @@
*/ */
#include <nvbench/blocking_kernel.cuh> #include <nvbench/blocking_kernel.cuh>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh> #include <nvbench/cuda_stream.cuh>
#include <nvbench/types.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/types.cuh>
#include <cuda/std/chrono> #include <cuda/std/chrono>

View File

@@ -24,7 +24,6 @@
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
namespace nvbench namespace nvbench
@@ -40,14 +39,14 @@ public:
/** /**
* @return The singleton criterion_manager instance. * @return The singleton criterion_manager instance.
*/ */
static criterion_manager& get(); static criterion_manager &get();
/** /**
* Register a new stopping criterion. * Register a new stopping criterion.
*/ */
nvbench::stopping_criterion_base& add(std::unique_ptr<nvbench::stopping_criterion_base> criterion); nvbench::stopping_criterion_base &add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
nvbench::stopping_criterion_base& get_criterion(const std::string& name); nvbench::stopping_criterion_base &get_criterion(const std::string &name);
const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const; const nvbench::stopping_criterion_base &get_criterion(const std::string &name) const;
using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>; using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
params_description get_params_description() const; params_description get_params_description() const;

View File

@@ -41,7 +41,7 @@ criterion_manager &criterion_manager::get()
return registry; return registry;
} }
stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) stopping_criterion_base &criterion_manager::get_criterion(const std::string &name)
{ {
auto iter = m_map.find(name); auto iter = m_map.find(name);
if (iter == m_map.end()) if (iter == m_map.end())
@@ -51,7 +51,8 @@ stopping_criterion_base& criterion_manager::get_criterion(const std::string& nam
return *iter->second.get(); return *iter->second.get();
} }
const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const const nvbench::stopping_criterion_base &
criterion_manager::get_criterion(const std::string &name) const
{ {
auto iter = m_map.find(name); auto iter = m_map.find(name);
if (iter == m_map.end()) if (iter == m_map.end())
@@ -69,8 +70,7 @@ stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criteri
if (!success) if (!success)
{ {
NVBENCH_THROW(std::runtime_error, NVBENCH_THROW(std::runtime_error, "Stopping criterion \"{}\" is already registered.", name);
"Stopping criterion \"{}\" is already registered.", name);
} }
return *it->second.get(); return *it->second.get();

View File

@@ -16,14 +16,12 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/csv_printer.cuh>
#include <nvbench/axes_metadata.cuh> #include <nvbench/axes_metadata.cuh>
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/csv_printer.cuh>
#include <nvbench/device_info.cuh> #include <nvbench/device_info.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/internal/table_builder.cuh> #include <nvbench/internal/table_builder.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h> #include <fmt/format.h>
@@ -169,7 +167,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches)
std::size_t remaining = table.m_columns.size(); std::size_t remaining = table.m_columns.size();
for (const auto &col : table.m_columns) for (const auto &col : table.m_columns)
{ {
fmt::format_to(std::back_inserter(buffer), "{}{}", col.rows[i], (--remaining == 0) ? "" : ","); fmt::format_to(std::back_inserter(buffer),
"{}{}",
col.rows[i],
(--remaining == 0) ? "" : ",");
} }
fmt::format_to(std::back_inserter(buffer), "\n"); fmt::format_to(std::back_inserter(buffer), "\n");
} }

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>

View File

@@ -17,7 +17,6 @@
*/ */
#include <nvbench/cupti_profiler.cuh> #include <nvbench/cupti_profiler.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh> #include <nvbench/device_info.cuh>
@@ -54,7 +53,9 @@ void nvpw_call(const NVPA_Status status)
{ {
if (status != NVPA_STATUS_SUCCESS) if (status != NVPA_STATUS_SUCCESS)
{ {
NVBENCH_THROW(std::runtime_error, "NVPW call returned error: {}", static_cast<std::underlying_type_t<NVPA_Status>>(status)); NVBENCH_THROW(std::runtime_error,
"NVPW call returned error: {}",
static_cast<std::underlying_type_t<NVPA_Status>>(status));
} }
} }

View File

@@ -18,9 +18,9 @@
#pragma once #pragma once
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/ring_buffer.cuh> #include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <vector> #include <vector>
@@ -38,7 +38,7 @@ class entropy_criterion final : public stopping_criterion_base
nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299}; nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299};
// Used to avoid re-allocating temporary memory // Used to avoid re-allocating temporary memory
std::vector<nvbench::float64_t> m_probabilities; std::vector<nvbench::float64_t> m_probabilities;
nvbench::float64_t compute_entropy(); nvbench::float64_t compute_entropy();
@@ -49,7 +49,6 @@ protected:
virtual void do_initialize() override; virtual void do_initialize() override;
virtual void do_add_measurement(nvbench::float64_t measurement) override; virtual void do_add_measurement(nvbench::float64_t measurement) override;
virtual bool do_is_finished() override; virtual bool do_is_finished() override;
}; };
} // namespace nvbench::detail } // namespace nvbench::detail

View File

@@ -21,7 +21,6 @@
#include <cmath> #include <cmath>
namespace nvbench::detail namespace nvbench::detail
{ {
@@ -40,7 +39,7 @@ void entropy_criterion::do_initialize()
m_freq_tracker.clear(); m_freq_tracker.clear();
} }
nvbench::float64_t entropy_criterion::compute_entropy() nvbench::float64_t entropy_criterion::compute_entropy()
{ {
const std::size_t n = m_freq_tracker.size(); const std::size_t n = m_freq_tracker.size();
if (n == 0) if (n == 0)
@@ -70,15 +69,15 @@ void entropy_criterion::do_add_measurement(nvbench::float64_t measurement)
m_total_cuda_time += measurement; m_total_cuda_time += measurement;
{ {
auto key = measurement; auto key = measurement;
constexpr bool bin_keys = false; constexpr bool bin_keys = false;
if (bin_keys) if (bin_keys)
{ {
const auto resolution_us = 0.5; const auto resolution_us = 0.5;
const auto resulution_s = resolution_us / 1'000'000; const auto resulution_s = resolution_us / 1000000;
const auto epsilon = resulution_s * 2; const auto epsilon = resulution_s * 2;
key = std::round(key / epsilon) * epsilon; key = std::round(key / epsilon) * epsilon;
} }
// This approach is about 3x faster than `std::{unordered_,}map` // This approach is about 3x faster than `std::{unordered_,}map`
@@ -120,7 +119,7 @@ bool entropy_criterion::do_is_finished()
const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean); const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle")) if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
{ {
return false; return false;
} }

View File

@@ -25,13 +25,13 @@
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <nvbench/summary.cuh> #include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <algorithm> #include <algorithm>
#include <chrono> #include <chrono>
#include <limits> #include <limits>
#include <thread> #include <thread>
#include <fmt/format.h>
namespace nvbench::detail namespace nvbench::detail
{ {

View File

@@ -18,8 +18,6 @@
#pragma once #pragma once
#include <cuda_runtime.h>
#include <nvbench/blocking_kernel.cuh> #include <nvbench/blocking_kernel.cuh>
#include <nvbench/cpu_timer.cuh> #include <nvbench/cpu_timer.cuh>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
@@ -32,12 +30,13 @@
#include <nvbench/exec_tag.cuh> #include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh> #include <nvbench/launch.cuh>
#include <nvbench/stopping_criterion.cuh> #include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <cuda_runtime.h>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "nvbench/types.cuh"
namespace nvbench namespace nvbench
{ {

View File

@@ -19,13 +19,12 @@
#pragma once #pragma once
#include <nvbench/cpu_timer.cuh> #include <nvbench/cpu_timer.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/statistics.cuh>
#include <nvbench/exec_tag.cuh> #include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh> #include <nvbench/launch.cuh>
#include <nvbench/stopping_criterion.cuh> #include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/statistics.cuh>
#include <utility> #include <utility>
#include <vector> #include <vector>
@@ -66,7 +65,7 @@ protected:
nvbench::cpu_timer m_walltime_timer; nvbench::cpu_timer m_walltime_timer;
nvbench::criterion_params m_criterion_params; nvbench::criterion_params m_criterion_params;
nvbench::stopping_criterion_base& m_stopping_criterion; nvbench::stopping_criterion_base &m_stopping_criterion;
bool m_run_once{false}; bool m_run_once{false};

View File

@@ -24,11 +24,11 @@
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <nvbench/summary.cuh> #include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
#include <fmt/format.h>
namespace nvbench::detail namespace nvbench::detail
{ {
@@ -36,7 +36,8 @@ measure_cpu_only_base::measure_cpu_only_base(state &exec_state)
: m_state{exec_state} : m_state{exec_state}
, m_launch(m_state.get_cuda_stream()) , m_launch(m_state.get_cuda_stream())
, m_criterion_params{exec_state.get_criterion_params()} , m_criterion_params{exec_state.get_criterion_params()}
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())} , m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(
exec_state.get_stopping_criterion())}
, m_run_once{exec_state.get_run_once()} , m_run_once{exec_state.get_run_once()}
, m_min_samples{exec_state.get_min_samples()} , m_min_samples{exec_state.get_min_samples()}
, m_skip_time{exec_state.get_skip_time()} , m_skip_time{exec_state.get_skip_time()}
@@ -72,7 +73,7 @@ void measure_cpu_only_base::run_trials_prologue() { m_walltime_timer.start(); }
void measure_cpu_only_base::record_measurements() void measure_cpu_only_base::record_measurements()
{ {
// Update and record timers and counters: // Update and record timers and counters:
const auto cur_cpu_time = m_cpu_timer.get_duration(); const auto cur_cpu_time = m_cpu_timer.get_duration();
m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time); m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time);
m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time); m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time);
@@ -188,8 +189,7 @@ void measure_cpu_only_base::generate_summaries()
auto &summ = m_state.add_summary("nv/cpu_only/bw/global/bytes_per_second"); auto &summ = m_state.add_summary("nv/cpu_only/bw/global/bytes_per_second");
summ.set_string("name", "GlobalMem BW"); summ.set_string("name", "GlobalMem BW");
summ.set_string("hint", "byte_rate"); summ.set_string("hint", "byte_rate");
summ.set_string("description", summ.set_string("description", "Number of bytes read/written per second.");
"Number of bytes read/written per second.");
summ.set_float64("value", avg_used_gmem_bw); summ.set_float64("value", avg_used_gmem_bw);
} }
} // bandwidth } // bandwidth
@@ -210,9 +210,9 @@ void measure_cpu_only_base::generate_summaries()
if (m_max_time_exceeded) if (m_max_time_exceeded)
{ {
const auto timeout = m_walltime_timer.get_duration(); const auto timeout = m_walltime_timer.get_duration();
const auto max_noise = m_criterion_params.get_float64("max-noise"); const auto max_noise = m_criterion_params.get_float64("max-noise");
const auto min_time = m_criterion_params.get_float64("min-time"); const auto min_time = m_criterion_params.get_float64("min-time");
if (cpu_noise > max_noise) if (cpu_noise > max_noise)
{ {

View File

@@ -24,13 +24,12 @@
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_timer.cuh> #include <nvbench/cuda_timer.cuh>
#include <nvbench/cupti_profiler.cuh> #include <nvbench/cupti_profiler.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh> #include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/l2flush.cuh> #include <nvbench/detail/l2flush.cuh>
#include <nvbench/detail/statistics.cuh> #include <nvbench/detail/statistics.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <cuda_runtime.h> #include <cuda_runtime.h>

View File

@@ -16,9 +16,8 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/detail/measure_hot.cuh>
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/measure_hot.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh> #include <nvbench/device_info.cuh>
#include <nvbench/printer_base.cuh> #include <nvbench/printer_base.cuh>

View File

@@ -19,12 +19,11 @@
#pragma once #pragma once
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/detail/statistics.cuh> #include <nvbench/detail/statistics.cuh>
#include <cassert>
#include <cstddef> #include <cstddef>
#include <iterator> #include <iterator>
#include <cassert>
#include <vector> #include <vector>
namespace nvbench::detail namespace nvbench::detail
@@ -76,14 +75,14 @@ public:
return temp; return temp;
} }
ring_buffer_iterator operator+(difference_type n) const ring_buffer_iterator operator+(difference_type n) const
{ {
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr); return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
} }
ring_buffer_iterator operator-(difference_type n) const ring_buffer_iterator operator-(difference_type n) const
{ {
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr); return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
} }
difference_type operator-(const ring_buffer_iterator &other) const difference_type operator-(const ring_buffer_iterator &other) const
@@ -121,13 +120,9 @@ private:
std::size_t m_index{0}; std::size_t m_index{0};
bool m_full{false}; bool m_full{false};
std::size_t get_front_index() const std::size_t get_front_index() const { return m_full ? m_index : 0; }
{
return m_full ? m_index : 0;
}
public: public:
/** /**
* Create a new ring buffer with the requested capacity. * Create a new ring buffer with the requested capacity.
*/ */

View File

@@ -16,15 +16,13 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/detail/state_generator.cuh>
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/state_generator.cuh>
#include <nvbench/detail/transform_reduce.cuh>
#include <nvbench/device_info.cuh> #include <nvbench/device_info.cuh>
#include <nvbench/named_values.cuh> #include <nvbench/named_values.cuh>
#include <nvbench/type_axis.cuh> #include <nvbench/type_axis.cuh>
#include <nvbench/detail/transform_reduce.cuh>
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <functional> #include <functional>
@@ -165,7 +163,7 @@ void state_generator::build_axis_configs()
config.set_string(axis_info.axis, axis.get_input_string(axis_info.index)); config.set_string(axis_info.axis, axis.get_input_string(axis_info.index));
} }
} // type_si } // type_si
} // type_axis_config generation } // type_axis_config generation
// non_type_axis_config generation // non_type_axis_config generation
{ {
@@ -201,9 +199,9 @@ void state_generator::build_axis_configs()
axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); axes.get_string_axis(axis_info.axis).get_value(axis_info.index));
break; break;
} // switch (type) } // switch (type)
} // for (axis_info : current_indices) } // for (axis_info : current_indices)
} // for non_type_sg configs } // for non_type_sg configs
} // non_type_axis_config generation } // non_type_axis_config generation
} }
void state_generator::build_states() void state_generator::build_states()

View File

@@ -26,12 +26,10 @@
#include <iterator> #include <iterator>
#include <limits> #include <limits>
#include <numeric> #include <numeric>
#include <cmath>
#include <type_traits> #include <type_traits>
#ifndef M_PI #ifndef M_PI
#define M_PI 3.14159265358979323846 #define M_PI 3.14159265358979323846
#endif #endif
namespace nvbench::detail::statistics namespace nvbench::detail::statistics
@@ -154,7 +152,7 @@ nvbench::float64_t compute_r2(It first,
for (std::size_t i = 0; i < n; ++i, ++first) for (std::size_t i = 0; i < n; ++i, ++first)
{ {
const nvbench::float64_t y = *first; const nvbench::float64_t y = *first;
const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept; const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept;
ss_tot += (y - mean_y) * (y - mean_y); ss_tot += (y - mean_y) * (y - mean_y);
@@ -179,19 +177,10 @@ compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t inter
return compute_r2(first, last, compute_mean(first, last), slope, intercept); return compute_r2(first, last, compute_mean(first, last), slope, intercept);
} }
inline nvbench::float64_t rad2deg(nvbench::float64_t rad) inline nvbench::float64_t rad2deg(nvbench::float64_t rad) { return rad * 180.0 / M_PI; }
{
return rad * 180.0 / M_PI;
}
inline nvbench::float64_t slope2rad(nvbench::float64_t slope) inline nvbench::float64_t slope2rad(nvbench::float64_t slope) { return std::atan2(slope, 1.0); }
{
return std::atan2(slope, 1.0);
}
inline nvbench::float64_t slope2deg(nvbench::float64_t slope) inline nvbench::float64_t slope2deg(nvbench::float64_t slope) { return rad2deg(slope2rad(slope)); }
{
return rad2deg(slope2rad(slope));
}
} // namespace nvbench::detail::statistics } // namespace nvbench::detail::statistics

View File

@@ -18,9 +18,9 @@
#pragma once #pragma once
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/ring_buffer.cuh> #include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <vector> #include <vector>

View File

@@ -29,7 +29,7 @@ stdrel_criterion::stdrel_criterion()
void stdrel_criterion::do_initialize() void stdrel_criterion::do_initialize()
{ {
m_total_samples = 0; m_total_samples = 0;
m_total_cuda_time = 0.0; m_total_cuda_time = 0.0;
m_cuda_times.clear(); m_cuda_times.clear();
m_noise_tracker.clear(); m_noise_tracker.clear();
@@ -46,7 +46,7 @@ void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
m_cuda_times.cend(), m_cuda_times.cend(),
mean_cuda_time); mean_cuda_time);
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
if (std::isfinite(cuda_rel_stdev)) if (std::isfinite(cuda_rel_stdev))
{ {
m_noise_tracker.push_back(cuda_rel_stdev); m_noise_tracker.push_back(cuda_rel_stdev);

View File

@@ -19,6 +19,7 @@
#pragma once #pragma once
#include <fmt/format.h> #include <fmt/format.h>
#include <stdexcept> #include <stdexcept>
#define NVBENCH_THROW(exception_type, format_str, ...) \ #define NVBENCH_THROW(exception_type, format_str, ...) \

View File

@@ -16,13 +16,13 @@
* limitations under the License. * limitations under the License.
*/ */
#include <cuda_runtime.h>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh> #include <nvbench/cuda_stream.cuh>
#include <nvbench/detail/timestamps_kernel.cuh> #include <nvbench/detail/timestamps_kernel.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <cuda_runtime.h>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
@@ -71,12 +71,11 @@ void timestamps_kernel::record(const nvbench::cuda_stream &stream)
int num_sms = 0; int num_sms = 0;
NVBENCH_CUDA_CALL(cudaGetDevice(&device_id)); NVBENCH_CUDA_CALL(cudaGetDevice(&device_id));
NVBENCH_CUDA_CALL( NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id));
cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id));
get_timestamps_kernel<<<static_cast<unsigned int>(num_sms), 1, 0, stream.get_stream()>>>( get_timestamps_kernel<<<static_cast<unsigned int>(num_sms), 1, 0, stream.get_stream()>>>(
m_device_timestamps, m_device_timestamps,
m_device_timestamps + 1); m_device_timestamps + 1);
} }
} // namespace nvbench } // namespace nvbench::detail

View File

@@ -82,7 +82,7 @@ struct cartesian_product<nvbench::type_list<nvbench::type_list<T, Tail...>, TL,
using tail_prod = typename detail::cartesian_product<nvbench::type_list<TL, TLTail...>>::type; using tail_prod = typename detail::cartesian_product<nvbench::type_list<TL, TLTail...>>::type;
using cur = typename detail::prepend_each<T, tail_prod>::type; using cur = typename detail::prepend_each<T, tail_prod>::type;
using next = typename detail::cartesian_product< using next = typename detail::cartesian_product<
nvbench::type_list<nvbench::type_list<Tail...>, TL, TLTail...>>::type; nvbench::type_list<nvbench::type_list<Tail...>, TL, TLTail...>>::type;
using type = decltype(detail::concat(cur{}, next{})); using type = decltype(detail::concat(cur{}, next{}));
}; };

View File

@@ -16,11 +16,10 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/device_info.cuh>
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh> #include <nvbench/detail/device_scope.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/internal/nvml.cuh> #include <nvbench/internal/nvml.cuh>
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>

View File

@@ -18,17 +18,16 @@
#pragma once #pragma once
#include <cuda_runtime_api.h>
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh> #include <nvbench/detail/device_scope.cuh>
#include <cuda_runtime_api.h>
#include <cstdint> // CHAR_BIT #include <cstdint> // CHAR_BIT
#include <stdexcept> #include <stdexcept>
#include <utility>
#include <string_view> #include <string_view>
#include <utility>
// forward declare this for internal storage // forward declare this for internal storage
struct nvmlDevice_st; struct nvmlDevice_st;

View File

@@ -16,13 +16,12 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/device_manager.cuh>
#include <cuda_runtime_api.h>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh> #include <nvbench/detail/device_scope.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/device_manager.cuh>
#include <cuda_runtime_api.h>
namespace nvbench namespace nvbench
{ {
@@ -45,13 +44,13 @@ device_manager::device_manager()
} }
} }
const nvbench::device_info &device_manager::get_device(int id) const nvbench::device_info &device_manager::get_device(int id)
{ {
if (id < 0) if (id < 0)
{ {
NVBENCH_THROW(std::runtime_error, "Negative index: {}.", id); NVBENCH_THROW(std::runtime_error, "Negative index: {}.", id);
} }
return m_devices.at(static_cast<std::size_t>(id)); return m_devices.at(static_cast<std::size_t>(id));
} }
} // namespace nvbench } // namespace nvbench

View File

@@ -101,10 +101,10 @@ using no_gpu_t = tag<nvbench::detail::exec_flag::no_gpu>;
using no_batch_t = tag<nvbench::detail::exec_flag::no_batch>; using no_batch_t = tag<nvbench::detail::exec_flag::no_batch>;
using modifier_mask_t = tag<nvbench::detail::exec_flag::modifier_mask>; using modifier_mask_t = tag<nvbench::detail::exec_flag::modifier_mask>;
using hot_t = tag<nvbench::detail::exec_flag::hot>; using hot_t = tag<nvbench::detail::exec_flag::hot>;
using cold_t = tag<nvbench::detail::exec_flag::cold>; using cold_t = tag<nvbench::detail::exec_flag::cold>;
using cpu_only_t = tag<nvbench::detail::exec_flag::cpu_only>; using cpu_only_t = tag<nvbench::detail::exec_flag::cpu_only>;
using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>; using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
constexpr inline none_t none; constexpr inline none_t none;
constexpr inline timer_t timer; constexpr inline timer_t timer;

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/axis_base.cuh> #include <nvbench/axis_base.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <vector> #include <vector>
@@ -40,7 +39,10 @@ struct float64_axis final : public axis_base
[[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; } [[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; }
private: private:
std::unique_ptr<axis_base> do_clone() const final { return std::make_unique<float64_axis>(*this); } std::unique_ptr<axis_base> do_clone() const final
{
return std::make_unique<float64_axis>(*this);
}
std::size_t do_get_size() const final { return m_values.size(); } std::size_t do_get_size() const final { return m_values.size(); }
std::string do_get_input_string(std::size_t i) const final; std::string do_get_input_string(std::size_t i) const final;
std::string do_get_description(std::size_t i) const final; std::string do_get_description(std::size_t i) const final;

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/axis_base.cuh> #include <nvbench/axis_base.cuh>
#include <nvbench/flags.cuh> #include <nvbench/flags.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>

View File

@@ -16,9 +16,8 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/int64_axis.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/int64_axis.cuh>
#include <fmt/format.h> #include <fmt/format.h>

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/detail/transform_reduce.cuh> #include <nvbench/detail/transform_reduce.cuh>
#include <nvbench/internal/table_builder.cuh> #include <nvbench/internal/table_builder.cuh>
#include <fmt/color.h> #include <fmt/color.h>

View File

@@ -21,12 +21,12 @@
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#ifdef NVBENCH_HAS_NVML #ifdef NVBENCH_HAS_NVML
#include <nvml.h> #include <nvml.h>
#endif // NVBENCH_HAS_NVML #endif // NVBENCH_HAS_NVML
#include <fmt/format.h>
#include <stdexcept> #include <stdexcept>
namespace nvbench::nvml namespace nvbench::nvml
@@ -38,6 +38,7 @@ struct NVMLLifetimeManager
{ {
NVMLLifetimeManager(); NVMLLifetimeManager();
~NVMLLifetimeManager(); ~NVMLLifetimeManager();
private: private:
bool m_inited{false}; bool m_inited{false};
}; };

View File

@@ -16,24 +16,22 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/json_printer.cuh>
#include <nvbench/axes_metadata.cuh> #include <nvbench/axes_metadata.cuh>
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh> #include <nvbench/device_info.cuh>
#include <nvbench/device_manager.cuh> #include <nvbench/device_manager.cuh>
#include <nvbench/git_revision.cuh> #include <nvbench/git_revision.cuh>
#include <nvbench/json_printer.cuh>
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <nvbench/summary.cuh> #include <nvbench/summary.cuh>
#include <nvbench/version.cuh> #include <nvbench/version.cuh>
#include <nvbench/detail/throw.cuh> #include <nlohmann/json.hpp>
#include <fmt/format.h> #include <fmt/format.h>
#include <nlohmann/json.hpp>
#include <cstdint> #include <cstdint>
#include <fstream> #include <fstream>
#include <iterator> #include <iterator>
@@ -105,7 +103,7 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
default: default:
NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type."); NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type.");
} // end switch (value type) } // end switch (value type)
} // end foreach value name } // end foreach value name
} }
} // end namespace } // end namespace
@@ -225,27 +223,26 @@ static void add_devices_section(nlohmann::ordered_json &root)
auto &devices = root["devices"]; auto &devices = root["devices"];
for (const auto &dev_info : nvbench::device_manager::get().get_devices()) for (const auto &dev_info : nvbench::device_manager::get().get_devices())
{ {
auto &device = devices.emplace_back(); auto &device = devices.emplace_back();
device["id"] = dev_info.get_id(); device["id"] = dev_info.get_id();
device["name"] = dev_info.get_name(); device["name"] = dev_info.get_name();
device["sm_version"] = dev_info.get_sm_version(); device["sm_version"] = dev_info.get_sm_version();
device["ptx_version"] = dev_info.get_ptx_version(); device["ptx_version"] = dev_info.get_ptx_version();
device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate(); device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate();
device["number_of_sms"] = dev_info.get_number_of_sms(); device["number_of_sms"] = dev_info.get_number_of_sms();
device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm(); device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm();
device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm(); device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm();
device["max_threads_per_block"] = dev_info.get_max_threads_per_block(); device["max_threads_per_block"] = dev_info.get_max_threads_per_block();
device["registers_per_sm"] = dev_info.get_registers_per_sm(); device["registers_per_sm"] = dev_info.get_registers_per_sm();
device["registers_per_block"] = dev_info.get_registers_per_block(); device["registers_per_block"] = dev_info.get_registers_per_block();
device["global_memory_size"] = dev_info.get_global_memory_size(); device["global_memory_size"] = dev_info.get_global_memory_size();
device["global_memory_bus_peak_clock_rate"] = device["global_memory_bus_peak_clock_rate"] = dev_info.get_global_memory_bus_peak_clock_rate();
dev_info.get_global_memory_bus_peak_clock_rate(); device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width();
device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width(); device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth();
device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth(); device["l2_cache_size"] = dev_info.get_l2_cache_size();
device["l2_cache_size"] = dev_info.get_l2_cache_size(); device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm();
device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block();
device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block(); device["ecc_state"] = dev_info.get_ecc_state();
device["ecc_state"] = dev_info.get_ecc_state();
} }
} }
@@ -298,8 +295,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
false; false;
#endif #endif
} // "nvbench" } // "nvbench"
} // "version" } // "version"
} // "meta" } // "meta"
add_devices_section(root); add_devices_section(root);
@@ -362,8 +359,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
default: default:
break; break;
} // end switch (axis type) } // end switch (axis type)
} // end foreach axis value } // end foreach axis value
} // end foreach axis } // end foreach axis
auto &states = bench["states"]; auto &states = bench["states"];
for (const auto &exec_state : bench_ptr->get_states()) for (const auto &exec_state : bench_ptr->get_states())
@@ -431,8 +428,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
continue; continue;
} }
} // end foreach exec_state } // end foreach exec_state
} // end foreach benchmark } // end foreach benchmark
} // "benchmarks" } // "benchmarks"
m_ostream << root.dump(2) << "\n"; m_ostream << root.dump(2) << "\n";
} }
@@ -492,7 +489,7 @@ void json_printer::do_print_benchmark_list(const benchmark_vector &benches)
default: default:
break; break;
} // end switch (axis type) } // end switch (axis type)
} // end foreach axis value } // end foreach axis value
} }
} // end foreach bench } // end foreach bench

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/printer_base.cuh> #include <nvbench/printer_base.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <string> #include <string>

View File

@@ -16,15 +16,13 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/markdown_printer.cuh>
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/device_manager.cuh> #include <nvbench/device_manager.cuh>
#include <nvbench/internal/markdown_table.cuh>
#include <nvbench/markdown_printer.cuh>
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <nvbench/summary.cuh> #include <nvbench/summary.cuh>
#include <nvbench/internal/markdown_table.cuh>
#include <fmt/color.h> #include <fmt/color.h>
#include <fmt/format.h> #include <fmt/format.h>
@@ -72,8 +70,12 @@ void markdown_printer::do_print_device_info()
"* Max Shared Memory: {} KiB/SM, {} KiB/Block\n", "* Max Shared Memory: {} KiB/SM, {} KiB/Block\n",
device.get_shared_memory_per_sm() / 1024, device.get_shared_memory_per_sm() / 1024,
device.get_shared_memory_per_block() / 1024); device.get_shared_memory_per_block() / 1024);
fmt::format_to(std::back_inserter(buffer), "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024); fmt::format_to(std::back_inserter(buffer),
fmt::format_to(std::back_inserter(buffer), "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm()); "* L2 Cache Size: {} KiB\n",
device.get_l2_cache_size() / 1024);
fmt::format_to(std::back_inserter(buffer),
"* Maximum Active Blocks: {}/SM\n",
device.get_max_blocks_per_sm());
fmt::format_to(std::back_inserter(buffer), fmt::format_to(std::back_inserter(buffer),
"* Maximum Active Threads: {}/SM, {}/Block\n", "* Maximum Active Threads: {}/SM, {}/Block\n",
device.get_max_threads_per_sm(), device.get_max_threads_per_sm(),
@@ -82,7 +84,9 @@ void markdown_printer::do_print_device_info()
"* Available Registers: {}/SM, {}/Block\n", "* Available Registers: {}/SM, {}/Block\n",
device.get_registers_per_sm(), device.get_registers_per_sm(),
device.get_registers_per_block()); device.get_registers_per_block());
fmt::format_to(std::back_inserter(buffer), "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No"); fmt::format_to(std::back_inserter(buffer),
"* ECC Enabled: {}\n",
device.get_ecc_state() ? "Yes" : "No");
fmt::format_to(std::back_inserter(buffer), "\n"); fmt::format_to(std::back_inserter(buffer), "\n");
} }
m_ostream << fmt::to_string(buffer); m_ostream << fmt::to_string(buffer);
@@ -191,9 +195,12 @@ void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vec
{ {
desc = fmt::format(" ({})", desc); desc = fmt::format(" ({})", desc);
} }
fmt::format_to(std::back_inserter(buffer), " * `{}`{}\n", axis_ptr->get_input_string(i), desc); fmt::format_to(std::back_inserter(buffer),
" * `{}`{}\n",
axis_ptr->get_input_string(i),
desc);
} // end foreach value } // end foreach value
} // end foreach axis } // end foreach axis
fmt::format_to(std::back_inserter(buffer), "\n"); fmt::format_to(std::back_inserter(buffer), "\n");
} // end foreach bench } // end foreach bench

View File

@@ -16,10 +16,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/named_values.cuh>
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/named_values.cuh>
#include <fmt/format.h> #include <fmt/format.h>

View File

@@ -24,8 +24,8 @@
#include <nvbench/callable.cuh> #include <nvbench/callable.cuh>
#include <nvbench/config.cuh> #include <nvbench/config.cuh>
#include <nvbench/cpu_timer.cuh> #include <nvbench/cpu_timer.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/create.cuh> #include <nvbench/create.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/cuda_call.cuh> #include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh> #include <nvbench/cuda_stream.cuh>
#include <nvbench/cuda_timer.cuh> #include <nvbench/cuda_timer.cuh>

View File

@@ -34,6 +34,8 @@
#include <nvbench/internal/cli_help.cuh> #include <nvbench/internal/cli_help.cuh>
#include <nvbench/internal/cli_help_axis.cuh> #include <nvbench/internal/cli_help_axis.cuh>
#include <fmt/format.h>
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <cstdlib> #include <cstdlib>
@@ -44,12 +46,10 @@
#include <regex> #include <regex>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <string_view>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include <fmt/format.h>
#include <string_view>
namespace namespace
{ {

View File

@@ -191,9 +191,9 @@ protected:
virtual void do_process_bulk_data_float64(nvbench::state &, virtual void do_process_bulk_data_float64(nvbench::state &,
const std::string &, const std::string &,
const std::string &, const std::string &,
const std::vector<nvbench::float64_t> &){}; const std::vector<nvbench::float64_t> &) {};
virtual void do_print_benchmark_list(const benchmark_vector &) virtual void do_print_benchmark_list(const benchmark_vector &)
{ {
throw std::runtime_error{"nvbench::do_print_benchmark_list is not supported by this printer."}; throw std::runtime_error{"nvbench::do_print_benchmark_list is not supported by this printer."};
} }

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/state_generator.cuh> #include <nvbench/detail/state_generator.cuh>
#include <stdexcept> #include <stdexcept>

View File

@@ -16,10 +16,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/runner.cuh>
#include <nvbench/benchmark_base.cuh> #include <nvbench/benchmark_base.cuh>
#include <nvbench/printer_base.cuh> #include <nvbench/printer_base.cuh>
#include <nvbench/runner.cuh>
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <fmt/format.h> #include <fmt/format.h>

View File

@@ -20,13 +20,13 @@
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <fmt/color.h>
#include <fmt/format.h>
#include <algorithm> #include <algorithm>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <fmt/color.h>
#include <fmt/format.h>
namespace nvbench namespace nvbench
{ {

View File

@@ -21,19 +21,21 @@
#include <nvbench/named_values.cuh> #include <nvbench/named_values.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <string>
#include <initializer_list> #include <initializer_list>
#include <string>
#include <unordered_map> #include <unordered_map>
namespace nvbench namespace nvbench
{ {
namespace detail namespace detail
{ {
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation constexpr nvbench::float64_t compat_max_noise()
{
return 0.005;
} // 0.5% relative standard deviation
} // namespace detail } // namespace detail
@@ -43,6 +45,7 @@ constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relati
class criterion_params class criterion_params
{ {
nvbench::named_values m_named_values; nvbench::named_values m_named_values;
public: public:
criterion_params(); criterion_params();
criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>); criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>);
@@ -96,7 +99,7 @@ public:
* *
* This method is called once per benchmark run, before any measurements are provided. * This method is called once per benchmark run, before any measurements are provided.
*/ */
void initialize(const criterion_params &params) void initialize(const criterion_params &params)
{ {
m_params.set_from(params); m_params.set_from(params);
this->do_initialize(); this->do_initialize();
@@ -105,18 +108,12 @@ public:
/** /**
* Add the latest measurement to the criterion * Add the latest measurement to the criterion
*/ */
void add_measurement(nvbench::float64_t measurement) void add_measurement(nvbench::float64_t measurement) { this->do_add_measurement(measurement); }
{
this->do_add_measurement(measurement);
}
/** /**
* Check if the criterion has been met for all measurements processed by `add_measurement` * Check if the criterion has been met for all measurements processed by `add_measurement`
*/ */
bool is_finished() bool is_finished() { return this->do_is_finished(); }
{
return this->do_is_finished();
}
protected: protected:
/** /**

View File

@@ -16,10 +16,8 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/stopping_criterion.cuh>
namespace nvbench namespace nvbench
{ {
@@ -62,7 +60,7 @@ void criterion_params::set_from(const criterion_params &other)
void criterion_params::set_int64(std::string name, nvbench::int64_t value) void criterion_params::set_int64(std::string name, nvbench::int64_t value)
{ {
if (m_named_values.has_value(name)) if (m_named_values.has_value(name))
{ {
m_named_values.remove_value(name); m_named_values.remove_value(name);
} }
@@ -72,7 +70,7 @@ void criterion_params::set_int64(std::string name, nvbench::int64_t value)
void criterion_params::set_float64(std::string name, nvbench::float64_t value) void criterion_params::set_float64(std::string name, nvbench::float64_t value)
{ {
if (m_named_values.has_value(name)) if (m_named_values.has_value(name))
{ {
m_named_values.remove_value(name); m_named_values.remove_value(name);
} }
@@ -82,7 +80,7 @@ void criterion_params::set_float64(std::string name, nvbench::float64_t value)
void criterion_params::set_string(std::string name, std::string value) void criterion_params::set_string(std::string name, std::string value)
{ {
if (m_named_values.has_value(name)) if (m_named_values.has_value(name))
{ {
m_named_values.remove_value(name); m_named_values.remove_value(name);
} }
@@ -110,15 +108,11 @@ std::string criterion_params::get_string(const std::string &name) const
return m_named_values.get_string(name); return m_named_values.get_string(name);
} }
std::vector<std::string> criterion_params::get_names() const std::vector<std::string> criterion_params::get_names() const { return m_named_values.get_names(); }
{
return m_named_values.get_names();
}
nvbench::named_values::type criterion_params::get_type(const std::string &name) const nvbench::named_values::type criterion_params::get_type(const std::string &name) const
{ {
return m_named_values.get_type(name); return m_named_values.get_type(name);
} }
} // namespace nvbench
} // namespace nvbench::detail

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/axis_base.cuh> #include <nvbench/axis_base.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include <vector> #include <vector>

View File

@@ -19,7 +19,6 @@
#pragma once #pragma once
#include <nvbench/axis_base.cuh> #include <nvbench/axis_base.cuh>
#include <nvbench/type_list.cuh> #include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh> #include <nvbench/type_strings.cuh>

View File

@@ -16,9 +16,8 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/type_axis.cuh>
#include <nvbench/detail/throw.cuh> #include <nvbench/detail/throw.cuh>
#include <nvbench/type_axis.cuh>
#include <fmt/format.h> #include <fmt/format.h>
#include <fmt/ranges.h> #include <fmt/ranges.h>

View File

@@ -18,7 +18,7 @@
#pragma once #pragma once
#include "detail/type_list_impl.cuh" #include <nvbench/detail/type_list_impl.cuh>
#include <tuple> #include <tuple>
#include <type_traits> #include <type_traits>

View File

@@ -27,11 +27,11 @@
#endif #endif
#ifdef NVBENCH_CXXABI_DEMANGLE #ifdef NVBENCH_CXXABI_DEMANGLE
#include <cxxabi.h>
#include <cstdlib> #include <cstdlib>
#include <memory> #include <memory>
#include <cxxabi.h>
namespace namespace
{ {
struct free_wrapper struct free_wrapper

24
pyproject.toml Normal file
View File

@@ -0,0 +1,24 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
[tool.ruff]
target-version = "py310"
fix = true
show-fixes = true
exclude = ["docs/tools"]
[tool.ruff.lint]
extend-select = ["I"]
[tool.codespell]
# To run codespell interactively and fix errors that pre-commit reports, try
# `codespell -i 3 -w -H`. This will run with interactive review (-i 3), writes
# changes to disk (-w), and includes hidden files (-H).
# Note: pre-commit passes explicit lists of files here, which this skip file
# list doesn't override - the skip list is only to allow you to run codespell
# interactively.
skip = "./.git,./build,./CITATION.md"
# ignore short words, and typename parameters like OffsetT
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
ignore-words-list = "inout,imovable,optionN,aCount,quitted,Invokable,countr,unexpect,numer,euclidian,couldn,OffsetT,FromM"
builtin = "clear"
quiet-level = 3

View File

@@ -5,12 +5,11 @@ import math
import os import os
import sys import sys
from colorama import Fore
import tabulate import tabulate
from colorama import Fore
from nvbench_json import reader from nvbench_json import reader
# Parse version string into tuple, "x.y.z" -> (x, y, z) # Parse version string into tuple, "x.y.z" -> (x, y, z)
def version_tuple(v): def version_tuple(v):
return tuple(map(int, (v.split(".")))) return tuple(map(int, (v.split("."))))
@@ -139,15 +138,14 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
colalign.append("center") colalign.append("center")
for device_id in device_ids: for device_id in device_ids:
rows = [] rows = []
plot_data = {'cmp': {}, 'ref': {}, 'cmp_noise': {}, 'ref_noise': {}} plot_data = {"cmp": {}, "ref": {}, "cmp_noise": {}, "ref_noise": {}}
for cmp_state in cmp_states: for cmp_state in cmp_states:
cmp_state_name = cmp_state["name"] cmp_state_name = cmp_state["name"]
ref_state = next(filter(lambda st: st["name"] == cmp_state_name, ref_state = next(
ref_states), filter(lambda st: st["name"] == cmp_state_name, ref_states), None
None) )
if not ref_state: if not ref_state:
continue continue
@@ -158,9 +156,7 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
row = [] row = []
for axis_value in axis_values: for axis_value in axis_values:
axis_value_name = axis_value["name"] axis_value_name = axis_value["name"]
row.append(format_axis_value(axis_value_name, row.append(format_axis_value(axis_value_name, axis_value, axes))
axis_value,
axes))
cmp_summaries = cmp_state["summaries"] cmp_summaries = cmp_state["summaries"]
ref_summaries = ref_state["summaries"] ref_summaries = ref_state["summaries"]
@@ -171,23 +167,37 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
def lookup_summary(summaries, tag): def lookup_summary(summaries, tag):
return next(filter(lambda s: s["tag"] == tag, summaries), None) return next(filter(lambda s: s["tag"] == tag, summaries), None)
cmp_time_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/mean") cmp_time_summary = lookup_summary(
ref_time_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/mean") cmp_summaries, "nv/cold/time/gpu/mean"
cmp_noise_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/stdev/relative") )
ref_noise_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/stdev/relative") ref_time_summary = lookup_summary(
ref_summaries, "nv/cold/time/gpu/mean"
)
cmp_noise_summary = lookup_summary(
cmp_summaries, "nv/cold/time/gpu/stdev/relative"
)
ref_noise_summary = lookup_summary(
ref_summaries, "nv/cold/time/gpu/stdev/relative"
)
# TODO: Use other timings, too. Maybe multiple rows, with a # TODO: Use other timings, too. Maybe multiple rows, with a
# "Timing" column + values "CPU/GPU/Batch"? # "Timing" column + values "CPU/GPU/Batch"?
if not all([cmp_time_summary, if not all(
ref_time_summary, [
cmp_noise_summary, cmp_time_summary,
ref_noise_summary]): ref_time_summary,
cmp_noise_summary,
ref_noise_summary,
]
):
continue continue
def extract_value(summary): def extract_value(summary):
summary_data = summary["data"] summary_data = summary["data"]
value_data = next(filter(lambda v: v["name"] == "value", summary_data)) value_data = next(
assert(value_data["type"] == "float64") filter(lambda v: v["name"] == "value", summary_data)
)
assert value_data["type"] == "float64"
return value_data["value"] return value_data["value"]
cmp_time = extract_value(cmp_time_summary) cmp_time = extract_value(cmp_time_summary)
@@ -218,23 +228,27 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
if plot: if plot:
axis_name = [] axis_name = []
axis_value = "--" axis_value = "--"
for aid in range(len(axis_values)): for aid in range(len(axis_values)):
if axis_values[aid]["name"] != plot: if axis_values[aid]["name"] != plot:
axis_name.append("{} = {}".format(axis_values[aid]["name"], axis_values[aid]["value"])) axis_name.append(
"{} = {}".format(
axis_values[aid]["name"], axis_values[aid]["value"]
)
)
else: else:
axis_value = float(axis_values[aid]["value"]) axis_value = float(axis_values[aid]["value"])
axis_name = ', '.join(axis_name) axis_name = ", ".join(axis_name)
if axis_name not in plot_data['cmp']: if axis_name not in plot_data["cmp"]:
plot_data['cmp'][axis_name] = {} plot_data["cmp"][axis_name] = {}
plot_data['ref'][axis_name] = {} plot_data["ref"][axis_name] = {}
plot_data['cmp_noise'][axis_name] = {} plot_data["cmp_noise"][axis_name] = {}
plot_data['ref_noise'][axis_name] = {} plot_data["ref_noise"][axis_name] = {}
plot_data['cmp'][axis_name][axis_value] = cmp_time plot_data["cmp"][axis_name][axis_value] = cmp_time
plot_data['ref'][axis_name][axis_value] = ref_time plot_data["ref"][axis_name][axis_value] = ref_time
plot_data['cmp_noise'][axis_name][axis_value] = cmp_noise plot_data["cmp_noise"][axis_name][axis_value] = cmp_noise
plot_data['ref_noise'][axis_name][axis_value] = ref_noise plot_data["ref_noise"][axis_name][axis_value] = ref_noise
global config_count global config_count
global unknown_count global unknown_count
@@ -273,14 +287,13 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
print("## [%d] %s\n" % (device["id"], device["name"])) print("## [%d] %s\n" % (device["id"], device["name"]))
# colalign and github format require tabulate 0.8.3 # colalign and github format require tabulate 0.8.3
if tabulate_version >= (0, 8, 3): if tabulate_version >= (0, 8, 3):
print(tabulate.tabulate(rows, print(
headers=headers, tabulate.tabulate(
colalign=colalign, rows, headers=headers, colalign=colalign, tablefmt="github"
tablefmt="github")) )
)
else: else:
print(tabulate.tabulate(rows, print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
headers=headers,
tablefmt="markdown"))
print("") print("")
@@ -295,18 +308,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
x = [float(x) for x in plot_data[key][axis].keys()] x = [float(x) for x in plot_data[key][axis].keys()]
y = list(plot_data[key][axis].values()) y = list(plot_data[key][axis].values())
noise = list(plot_data[key + '_noise'][axis].values()) noise = list(plot_data[key + "_noise"][axis].values())
top = [y[i] + y[i] * noise[i] for i in range(len(x))] top = [y[i] + y[i] * noise[i] for i in range(len(x))]
bottom = [y[i] - y[i] * noise[i] for i in range(len(x))] bottom = [y[i] - y[i] * noise[i] for i in range(len(x))]
p = plt.plot(x, y, shape, marker='o', label=label) p = plt.plot(x, y, shape, marker="o", label=label)
plt.fill_between(x, bottom, top, color=p[0].get_color(), alpha=0.1) plt.fill_between(x, bottom, top, color=p[0].get_color(), alpha=0.1)
for axis in plot_data["cmp"].keys():
for axis in plot_data['cmp'].keys(): plot_line("cmp", "-", axis)
plot_line('cmp', '-', axis) plot_line("ref", "--", axis + " ref")
plot_line('ref', '--', axis + ' ref')
plt.legend() plt.legend()
plt.show() plt.show()
@@ -314,11 +326,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
def main(): def main():
help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]" help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]"
parser = argparse.ArgumentParser(prog='nvbench_compare', usage=help_text) parser = argparse.ArgumentParser(prog="nvbench_compare", usage=help_text)
parser.add_argument('--threshold-diff', type=float, dest='threshold', default=0.0, parser.add_argument(
help='only show benchmarks where percentage diff is >= THRESHOLD') "--threshold-diff",
parser.add_argument('--plot-along', type=str, dest='plot', default=None, type=float,
help='plot results') dest="threshold",
default=0.0,
help="only show benchmarks where percentage diff is >= THRESHOLD",
)
parser.add_argument(
"--plot-along", type=str, dest="plot", default=None, help="plot results"
)
args, files_or_dirs = parser.parse_known_args() args, files_or_dirs = parser.parse_known_args()
print(files_or_dirs) print(files_or_dirs)
@@ -336,14 +354,17 @@ def main():
continue continue
r = os.path.join(files_or_dirs[0], f) r = os.path.join(files_or_dirs[0], f)
c = os.path.join(files_or_dirs[1], f) c = os.path.join(files_or_dirs[1], f)
if os.path.isfile(r) and os.path.isfile(c) and \ if (
os.path.getsize(r) > 0 and os.path.getsize(c) > 0: os.path.isfile(r)
and os.path.isfile(c)
and os.path.getsize(r) > 0
and os.path.getsize(c) > 0
):
to_compare.append((r, c)) to_compare.append((r, c))
else: else:
to_compare = [(files_or_dirs[0], files_or_dirs[1])] to_compare = [(files_or_dirs[0], files_or_dirs[1])]
for ref, comp in to_compare: for ref, comp in to_compare:
ref_root = reader.read_file(ref) ref_root = reader.read_file(ref)
cmp_root = reader.read_file(comp) cmp_root = reader.read_file(comp)
@@ -355,7 +376,9 @@ def main():
print("Device sections do not match.") print("Device sections do not match.")
sys.exit(1) sys.exit(1)
compare_benches(ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot) compare_benches(
ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot
)
print("# Summary\n") print("# Summary\n")
print("- Total Matches: %d" % config_count) print("- Total Matches: %d" % config_count)
@@ -365,5 +388,5 @@ def main():
return failure_count return failure_count
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -1,19 +1,19 @@
#!/usr/bin/env python #!/usr/bin/env python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import argparse import argparse
import os import os
import sys import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from nvbench_json import reader from nvbench_json import reader
def parse_files(): def parse_files():
help_text = "%(prog)s [nvbench.out.json | dir/] ..." help_text = "%(prog)s [nvbench.out.json | dir/] ..."
parser = argparse.ArgumentParser(prog='nvbench_histogram', usage=help_text) parser = argparse.ArgumentParser(prog="nvbench_histogram", usage=help_text)
args, files_or_dirs = parser.parse_known_args() args, files_or_dirs = parser.parse_known_args()
@@ -41,14 +41,14 @@ def parse_files():
def extract_filename(summary): def extract_filename(summary):
summary_data = summary["data"] summary_data = summary["data"]
value_data = next(filter(lambda v: v["name"] == "filename", summary_data)) value_data = next(filter(lambda v: v["name"] == "filename", summary_data))
assert(value_data["type"] == "string") assert value_data["type"] == "string"
return value_data["value"] return value_data["value"]
def extract_size(summary): def extract_size(summary):
summary_data = summary["data"] summary_data = summary["data"]
value_data = next(filter(lambda v: v["name"] == "size", summary_data)) value_data = next(filter(lambda v: v["name"] == "size", summary_data))
assert(value_data["type"] == "int64") assert value_data["type"] == "int64"
return int(value_data["value"]) return int(value_data["value"])
@@ -57,9 +57,10 @@ def parse_samples_meta(filename, state):
if not summaries: if not summaries:
return None, None return None, None
summary = next(filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", summary = next(
summaries), filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", summaries),
None) None,
)
if not summary: if not summary:
return None, None return None, None
@@ -81,7 +82,7 @@ def parse_samples(filename, state):
with open(samples_filename, "rb") as f: with open(samples_filename, "rb") as f:
samples = np.fromfile(f, "<f4") samples = np.fromfile(f, "<f4")
assert (sample_count == len(samples)) assert sample_count == len(samples)
return samples return samples
@@ -118,5 +119,5 @@ def main():
plt.show() plt.show()
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -1,2 +1,3 @@
from . import reader from . import reader, version
from . import version
__all__ = ["reader", "version"]

View File

@@ -1,8 +1,8 @@
file_version = (1, 0, 0) file_version = (1, 0, 0)
file_version_string = "{}.{}.{}".format(file_version[0], file_version_string = "{}.{}.{}".format(
file_version[1], file_version[0], file_version[1], file_version[2]
file_version[2]) )
def check_file_version(filename, root_node): def check_file_version(filename, root_node):
@@ -19,8 +19,14 @@ def check_file_version(filename, root_node):
# for now just warn on mismatch. # for now just warn on mismatch.
if version_node["string"] != file_version_string: if version_node["string"] != file_version_string:
print("WARNING:") print("WARNING:")
print(" {} was written using a different NVBench JSON file version." print(
.format(filename)) " {} was written using a different NVBench JSON file version.".format(
filename
)
)
print(" It may not read correctly.") print(" It may not read correctly.")
print(" (file version: {} reader version: {})" print(
.format(version_node["string"], file_version_string)) " (file version: {} reader version: {})".format(
version_node["string"], file_version_string
)
)

View File

@@ -5,9 +5,8 @@ import math
import os import os
import sys import sys
from nvbench_json import reader
import tabulate import tabulate
from nvbench_json import reader
# Parse version string into tuple, "x.y.z" -> (x, y, z) # Parse version string into tuple, "x.y.z" -> (x, y, z)
@@ -39,7 +38,8 @@ def format_walltime(seconds_in):
"{:0>2d}:".format(h) if h > 1e-9 else "", "{:0>2d}:".format(h) if h > 1e-9 else "",
"{:0>2d}:".format(m) if (h > 1e-9 or m > 1e-9) else "", "{:0>2d}:".format(m) if (h > 1e-9 or m > 1e-9) else "",
"{:0>2d}.".format(s) if (h > 1e-9 or m > 1e-9) else "{:d}.".format(s), "{:0>2d}.".format(s) if (h > 1e-9 or m > 1e-9) else "{:d}.".format(s),
"{:0>3d}".format(ms)) "{:0>3d}".format(ms),
)
def format_percentage(percentage): def format_percentage(percentage):
@@ -58,7 +58,7 @@ measure_column_names = {"cold": "Isolated", "batch": "Batch", "cupti": "CUPTI"}
def init_measures(): def init_measures():
out = {} out = {}
for name in measure_names: for name in measure_names:
out[name] = 0. out[name] = 0.0
return out return out
@@ -67,17 +67,17 @@ def get_measures(state):
times = {} times = {}
for name in measure_names: for name in measure_names:
measure_walltime_tag = "nv/{}/walltime".format(name) measure_walltime_tag = "nv/{}/walltime".format(name)
summary = next(filter(lambda s: s["tag"] == measure_walltime_tag, summary = next(
summaries), filter(lambda s: s["tag"] == measure_walltime_tag, summaries), None
None) )
if not summary: if not summary:
continue continue
walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"])) walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"]))
assert(walltime_data["type"] == "float64") assert walltime_data["type"] == "float64"
walltime = walltime_data["value"] walltime = walltime_data["value"]
walltime = float(walltime) walltime = float(walltime)
times[name] = walltime if walltime else 0. times[name] = walltime if walltime else 0.0
return times return times
@@ -87,7 +87,7 @@ def merge_measures(target, src):
def sum_measures(measures): def sum_measures(measures):
total_time = 0. total_time = 0.0
for time in measures.values(): for time in measures.values():
total_time += time total_time += time
return total_time return total_time
@@ -194,20 +194,21 @@ def print_overview_section(data):
# colalign and github format require tabulate 0.8.3 # colalign and github format require tabulate 0.8.3
if tabulate_version >= (0, 8, 3): if tabulate_version >= (0, 8, 3):
print(tabulate.tabulate(rows, print(
headers=headers, tabulate.tabulate(
colalign=colalign, rows, headers=headers, colalign=colalign, tablefmt="github"
tablefmt="github")) )
)
else: else:
print(tabulate.tabulate(rows, print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
headers=headers,
tablefmt="markdown"))
print() print()
# append_data_row_lambda args: (row_list, name, items[name]) # append_data_row_lambda args: (row_list, name, items[name])
def print_measures_table(headers, colalign, items, total_measures, append_item_row_lambda): def print_measures_table(
headers, colalign, items, total_measures, append_item_row_lambda
):
total_time = sum_measures(total_measures) total_time = sum_measures(total_measures)
active_measures = get_active_measure_names(total_measures) active_measures = get_active_measure_names(total_measures)
num_user_columns = len(headers) num_user_columns = len(headers)
@@ -248,14 +249,13 @@ def print_measures_table(headers, colalign, items, total_measures, append_item_r
# colalign and github format require tabulate 0.8.3 # colalign and github format require tabulate 0.8.3
if tabulate_version >= (0, 8, 3): if tabulate_version >= (0, 8, 3):
print(tabulate.tabulate(rows, print(
headers=headers, tabulate.tabulate(
colalign=colalign, rows, headers=headers, colalign=colalign, tablefmt="github"
tablefmt="github")) )
)
else: else:
print(tabulate.tabulate(rows, print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
headers=headers,
tablefmt="markdown"))
def print_files_section(data): def print_files_section(data):
@@ -319,7 +319,7 @@ def print_bench_section(bench_name, bench):
def main(): def main():
help_text = "%(prog)s [nvbench.out.json | dir/]..." help_text = "%(prog)s [nvbench.out.json | dir/]..."
parser = argparse.ArgumentParser(prog='nvbench_walltime', usage=help_text) parser = argparse.ArgumentParser(prog="nvbench_walltime", usage=help_text)
args, files_or_dirs = parser.parse_known_args() args, files_or_dirs = parser.parse_known_args()
@@ -353,5 +353,5 @@ def main():
print_files_section(data) print_files_section(data)
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -17,22 +17,19 @@
*/ */
#include <nvbench/axes_metadata.cuh> #include <nvbench/axes_metadata.cuh>
#include <nvbench/type_list.cuh> #include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh> #include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
#include <algorithm> #include <algorithm>
#include <string_view> #include <string_view>
using int_list = nvbench::type_list<nvbench::int8_t, #include "test_asserts.cuh"
nvbench::int16_t,
nvbench::int32_t, using int_list =
nvbench::int64_t>; nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>; using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
@@ -110,7 +107,6 @@ void test_default_type_axes_names()
ASSERT(axes.get_type_axis(4).get_name() == "T4"); ASSERT(axes.get_type_axis(4).get_name() == "T4");
ASSERT(axes.get_type_axis(4).get_axis_index() == 4); ASSERT(axes.get_type_axis(4).get_axis_index() == 4);
} }
} }
void test_type_axes() void test_type_axes()
@@ -138,8 +134,7 @@ void test_type_axes()
fmt::format_to(std::back_inserter(buffer), fmt::format_to(std::back_inserter(buffer),
" - {}{}\n", " - {}{}\n",
input_string, input_string,
description.empty() ? "" description.empty() ? "" : fmt::format(" ({})", description));
: fmt::format(" ({})", description));
} }
} }
@@ -157,9 +152,8 @@ Axis: Other
)expected"; )expected";
const std::string test = fmt::to_string(buffer); const std::string test = fmt::to_string(buffer);
const auto diff = const auto diff = std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend());
std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend()); const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
ASSERT_MSG(test == ref, ASSERT_MSG(test == ref,
"Differs at character {}.\n" "Differs at character {}.\n"
"Expected:\n\"{}\"\n\n" "Expected:\n\"{}\"\n\n"
@@ -189,9 +183,7 @@ void test_float64_axes()
void test_int64_axes() void test_int64_axes()
{ {
nvbench::axes_metadata axes; nvbench::axes_metadata axes;
axes.add_int64_axis("I64 Axis", axes.add_int64_axis("I64 Axis", {10, 11, 12, 13, 14}, nvbench::int64_axis_flags::none);
{10, 11, 12, 13, 14},
nvbench::int64_axis_flags::none);
ASSERT(axes.get_axes().size() == 1); ASSERT(axes.get_axes().size() == 1);
const auto &axis = axes.get_int64_axis("I64 Axis"); const auto &axis = axes.get_int64_axis("I64 Axis");
ASSERT(axis.get_size() == 5); ASSERT(axis.get_size() == 5);
@@ -205,9 +197,7 @@ void test_int64_axes()
void test_int64_power_of_two_axes() void test_int64_power_of_two_axes()
{ {
nvbench::axes_metadata axes; nvbench::axes_metadata axes;
axes.add_int64_axis("I64 POT Axis", axes.add_int64_axis("I64 POT Axis", {1, 2, 3, 4, 5}, nvbench::int64_axis_flags::power_of_two);
{1, 2, 3, 4, 5},
nvbench::int64_axis_flags::power_of_two);
ASSERT(axes.get_axes().size() == 1); ASSERT(axes.get_axes().size() == 1);
const auto &axis = axes.get_int64_axis("I64 POT Axis"); const auto &axis = axes.get_int64_axis("I64 POT Axis");
ASSERT(axis.get_size() == 5); ASSERT(axis.get_size() == 5);

View File

@@ -17,7 +17,6 @@
*/ */
#include <nvbench/benchmark.cuh> #include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh> #include <nvbench/callable.cuh>
#include <nvbench/named_values.cuh> #include <nvbench/named_values.cuh>
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
@@ -25,8 +24,6 @@
#include <nvbench/type_strings.cuh> #include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
#include <algorithm> #include <algorithm>
@@ -34,6 +31,8 @@
#include <variant> #include <variant>
#include <vector> #include <vector>
#include "test_asserts.cuh"
template <typename T> template <typename T>
std::vector<T> sort(std::vector<T> &&vec) std::vector<T> sort(std::vector<T> &&vec)
{ {
@@ -61,34 +60,26 @@ void no_op_generator(nvbench::state &state)
NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable); NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable);
template <typename Integer, typename Float, typename Other> template <typename Integer, typename Float, typename Other>
void template_no_op_generator(nvbench::state &state, void template_no_op_generator(nvbench::state &state, nvbench::type_list<Integer, Float, Other>)
nvbench::type_list<Integer, Float, Other>)
{ {
ASSERT(nvbench::type_strings<Integer>::input_string() == ASSERT(nvbench::type_strings<Integer>::input_string() == state.get_string("Integer"));
state.get_string("Integer")); ASSERT(nvbench::type_strings<Float>::input_string() == state.get_string("Float"));
ASSERT(nvbench::type_strings<Float>::input_string() == ASSERT(nvbench::type_strings<Other>::input_string() == state.get_string("Other"));
state.get_string("Float"));
ASSERT(nvbench::type_strings<Other>::input_string() ==
state.get_string("Other"));
// Enum params using non-templated version: // Enum params using non-templated version:
no_op_generator(state); no_op_generator(state);
} }
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable);
template_no_op_callable);
using int_list = nvbench::type_list<nvbench::int8_t, using int_list =
nvbench::int16_t, nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
nvbench::int32_t,
nvbench::int64_t>;
using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>; using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
using misc_list = nvbench::type_list<bool, void>; using misc_list = nvbench::type_list<bool, void>;
using lots_of_types_bench = using lots_of_types_bench =
nvbench::benchmark<template_no_op_callable, nvbench::benchmark<template_no_op_callable, nvbench::type_list<int_list, float_list, misc_list>>;
nvbench::type_list<int_list, float_list, misc_list>>;
using no_types_bench = nvbench::benchmark<no_op_callable>; using no_types_bench = nvbench::benchmark<no_op_callable>;
@@ -110,8 +101,7 @@ void test_type_axes()
fmt::format_to(std::back_inserter(buffer), fmt::format_to(std::back_inserter(buffer),
" - {}{}\n", " - {}{}\n",
input_string, input_string,
description.empty() ? "" description.empty() ? "" : fmt::format(" ({})", description));
: fmt::format(" ({})", description));
} }
} }
@@ -300,9 +290,7 @@ void test_get_config_count()
auto const num_devices = bench.get_devices().size(); auto const num_devices = bench.get_devices().size();
ASSERT_MSG(bench.get_config_count() == 72 * num_devices, ASSERT_MSG(bench.get_config_count() == 72 * num_devices, "Got {}", bench.get_config_count());
"Got {}",
bench.get_config_count());
} }
int main() int main()

View File

@@ -18,11 +18,11 @@
#include <nvbench/cpu_timer.cuh> #include <nvbench/cpu_timer.cuh>
#include "test_asserts.cuh"
#include <chrono> #include <chrono>
#include <thread> #include <thread>
#include "test_asserts.cuh"
void test_basic() void test_basic()
{ {
using namespace std::literals::chrono_literals; using namespace std::literals::chrono_literals;

View File

@@ -16,17 +16,14 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/create.cuh>
#include <nvbench/benchmark.cuh> #include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh> #include <nvbench/callable.cuh>
#include <nvbench/create.cuh>
#include <nvbench/state.cuh> #include <nvbench/state.cuh>
#include <nvbench/type_list.cuh> #include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh> #include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
#include <algorithm> #include <algorithm>
@@ -34,6 +31,8 @@
#include <variant> #include <variant>
#include <vector> #include <vector>
#include "test_asserts.cuh"
template <typename T> template <typename T>
std::vector<T> sort(std::vector<T> &&vec) std::vector<T> sort(std::vector<T> &&vec)
{ {
@@ -72,15 +71,11 @@ using misc_types = nvbench::type_list<bool, void>;
using type_axes = nvbench::type_list<float_types, int_types, misc_types>; using type_axes = nvbench::type_list<float_types, int_types, misc_types>;
template <typename FloatT, typename IntT, typename MiscT> template <typename FloatT, typename IntT, typename MiscT>
void template_no_op_generator(nvbench::state &state, void template_no_op_generator(nvbench::state &state, nvbench::type_list<FloatT, IntT, MiscT>)
nvbench::type_list<FloatT, IntT, MiscT>)
{ {
ASSERT(nvbench::type_strings<FloatT>::input_string() == ASSERT(nvbench::type_strings<FloatT>::input_string() == state.get_string("FloatT"));
state.get_string("FloatT")); ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
ASSERT(nvbench::type_strings<IntT>::input_string() == ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
state.get_string("IntT"));
ASSERT(nvbench::type_strings<IntT>::input_string() ==
state.get_string("IntT"));
// Enum params using non-templated version: // Enum params using non-templated version:
no_op_generator(state); no_op_generator(state);
@@ -116,8 +111,7 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench,
void validate_default_name() void validate_default_name()
{ {
auto bench = auto bench = nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone();
nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone();
const std::string ref = "Params:\n"; const std::string ref = "Params:\n";
@@ -127,8 +121,7 @@ void validate_default_name()
void validate_custom_name() void validate_custom_name()
{ {
auto bench = auto bench = nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone();
nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone();
const std::string ref = "Params:\n"; const std::string ref = "Params:\n";
@@ -138,8 +131,7 @@ void validate_custom_name()
void validate_no_types() void validate_no_types()
{ {
auto bench = auto bench = nvbench::benchmark_manager::get().get_benchmark("No Types").clone();
nvbench::benchmark_manager::get().get_benchmark("No Types").clone();
const std::string ref = R"expected(Params: Float: 11 Int: 1 String: One const std::string ref = R"expected(Params: Float: 11 Int: 1 String: One
Params: Float: 11 Int: 2 String: One Params: Float: 11 Int: 2 String: One
@@ -176,8 +168,7 @@ Params: Float: 13 Int: 3 String: Three
void validate_only_types() void validate_only_types()
{ {
auto bench = auto bench = nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone();
nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone();
const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool
Params: FloatT: F32 IntT: I32 MiscT: void Params: FloatT: F32 IntT: I32 MiscT: void
@@ -195,8 +186,7 @@ Params: FloatT: F64 IntT: I64 MiscT: void
void validate_all_axes() void validate_all_axes()
{ {
auto bench = auto bench = nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone();
nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone();
const std::string ref = const std::string ref =
R"expected(Params: Float: 11 FloatT: F32 Int: 1 IntT: I32 MiscT: bool String: One R"expected(Params: Float: 11 FloatT: F32 Int: 1 IntT: I32 MiscT: bool String: One

View File

@@ -42,27 +42,34 @@ protected:
void test_no_duplicates_are_allowed() void test_no_duplicates_are_allowed()
{ {
nvbench::criterion_manager& manager = nvbench::criterion_manager::get(); nvbench::criterion_manager &manager = nvbench::criterion_manager::get();
bool exception_triggered = false; bool exception_triggered = false;
try { try
[[maybe_unused]] nvbench::stopping_criterion_base& _ = manager.get_criterion("custom"); {
} catch(...) { [[maybe_unused]] nvbench::stopping_criterion_base &_ = manager.get_criterion("custom");
}
catch (...)
{
exception_triggered = true; exception_triggered = true;
} }
ASSERT(exception_triggered); ASSERT(exception_triggered);
std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>(); std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>();
custom_criterion* custom_raw = custom_ptr.get(); custom_criterion *custom_raw = custom_ptr.get();
ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw); ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw);
nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom"); nvbench::stopping_criterion_base &custom =
nvbench::criterion_manager::get().get_criterion("custom");
ASSERT(custom_raw == &custom); ASSERT(custom_raw == &custom);
exception_triggered = false; exception_triggered = false;
try { try
{
manager.add(std::make_unique<custom_criterion>()); manager.add(std::make_unique<custom_criterion>());
} catch(...) { }
catch (...)
{
exception_triggered = true; exception_triggered = true;
} }
ASSERT(exception_triggered); ASSERT(exception_triggered);

View File

@@ -60,4 +60,3 @@ int main()
test_compat_overwrite(); test_compat_overwrite();
test_overwrite(); test_overwrite();
} }

View File

@@ -16,19 +16,16 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/cuda_timer.cuh>
#include <nvbench/cuda_stream.cuh> #include <nvbench/cuda_stream.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/test_kernels.cuh> #include <nvbench/test_kernels.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
void test_basic(cudaStream_t time_stream, #include "test_asserts.cuh"
cudaStream_t exec_stream,
bool expected) void test_basic(cudaStream_t time_stream, cudaStream_t exec_stream, bool expected)
{ {
nvbench::cuda_timer timer; nvbench::cuda_timer timer;

View File

@@ -16,8 +16,8 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/cuda_call.cuh>
#include <nvbench/nvbench.cuh> #include <nvbench/nvbench.cuh>
#include "nvbench/cuda_call.cuh"
/****************************************************************************** /******************************************************************************
* Install custom parser. * Install custom parser.
@@ -35,7 +35,7 @@
// User code to handle a specific argument: // User code to handle a specific argument:
void handle_my_custom_arg(); void handle_my_custom_arg();
// NVBench hook for modiifying the command line arguments before parsing: // NVBench hook for modifying the command line arguments before parsing:
void custom_arg_handler(std::vector<std::string> &args) void custom_arg_handler(std::vector<std::string> &args)
{ {
// Handle and remove "--my-custom-arg" // Handle and remove "--my-custom-arg"

View File

@@ -19,8 +19,8 @@
#include <nvbench/nvbench.cuh> #include <nvbench/nvbench.cuh>
#include <algorithm> #include <algorithm>
#include <cstdlib>
#include <cstdio> #include <cstdio>
#include <cstdlib>
/****************************************************************************** /******************************************************************************
* Test having global state that is initialized and finalized via RAII. * Test having global state that is initialized and finalized via RAII.

View File

@@ -29,12 +29,10 @@
void noisy_bench(nvbench::state &state) void noisy_bench(nvbench::state &state)
{ {
// time, convert ms -> s // time, convert ms -> s
const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) / const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) / 1000.f;
1000.f;
// rel stdev // rel stdev
const auto noise_pct = const auto noise_pct = static_cast<nvbench::float32_t>(state.get_float64("Noise"));
static_cast<nvbench::float32_t>(state.get_float64("Noise")); const auto noise = noise_pct / 100.f;
const auto noise = noise_pct / 100.f;
// abs stdev // abs stdev
const auto stdev = noise * mean; const auto stdev = noise * mean;
@@ -53,8 +51,7 @@ void noisy_bench(nvbench::state &state)
try try
{ {
return static_cast<nvbench::float32_t>( return static_cast<nvbench::float32_t>(
state.get_summary("nv/cold/time/gpu/stdev/relative") state.get_summary("nv/cold/time/gpu/stdev/relative").get_float64("value"));
.get_float64("value"));
} }
catch (std::invalid_argument &) catch (std::invalid_argument &)
{ {

View File

@@ -20,11 +20,11 @@
#include <nvbench/stopping_criterion.cuh> #include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh> #include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <vector>
#include <random>
#include <numeric> #include <numeric>
#include <random>
#include <vector>
#include "test_asserts.cuh"
void test_const() void test_const()
{ {
@@ -32,7 +32,7 @@ void test_const()
nvbench::detail::entropy_criterion criterion; nvbench::detail::entropy_criterion criterion;
criterion.initialize(params); criterion.initialize(params);
for (int i = 0; i < 6; i++) for (int i = 0; i < 6; i++)
{ // nvbench wants at least 5 to compute the standard deviation { // nvbench wants at least 5 to compute the standard deviation
criterion.add_measurement(42.0); criterion.add_measurement(42.0);
} }
@@ -48,7 +48,7 @@ void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion)
* 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+ * 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+
* 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 | * 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 |
* 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 | * 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 |
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample, * 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
* 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative * 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative
* 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 | * 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 |
* 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+ * 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+

View File

@@ -18,12 +18,12 @@
#include <nvbench/enum_type_list.cuh> #include <nvbench/enum_type_list.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
#include <type_traits> #include <type_traits>
#include "test_asserts.cuh"
// If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39. // If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39.
#if defined(__GNUC__) && __GNUC__ == 7 #if defined(__GNUC__) && __GNUC__ == 7
#define USING_GCC_7 #define USING_GCC_7
@@ -102,8 +102,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
void test_int() void test_int()
{ {
ASSERT((std::is_same_v<nvbench::enum_type_list<>, nvbench::type_list<>>)); ASSERT((std::is_same_v<nvbench::enum_type_list<>, nvbench::type_list<>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<0>, ASSERT((std::is_same_v<nvbench::enum_type_list<0>, nvbench::type_list<nvbench::enum_type<0>>>));
nvbench::type_list<nvbench::enum_type<0>>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<0, 1, 2, 3, 4>, ASSERT((std::is_same_v<nvbench::enum_type_list<0, 1, 2, 3, 4>,
nvbench::type_list<nvbench::enum_type<0>, nvbench::type_list<nvbench::enum_type<0>,
nvbench::enum_type<1>, nvbench::enum_type<1>,
@@ -115,42 +114,35 @@ void test_int()
void test_scoped_enum() void test_scoped_enum()
{ {
#ifndef USING_GCC_7 #ifndef USING_GCC_7
ASSERT(( ASSERT((std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>,
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>, nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
#endif #endif
ASSERT(( ASSERT((std::is_same_v<
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1, nvbench::enum_type_list<scoped_enum::val_1, scoped_enum::val_2, scoped_enum::val_3>,
scoped_enum::val_2, nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>,
scoped_enum::val_3>, nvbench::enum_type<scoped_enum::val_2>,
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>, nvbench::enum_type<scoped_enum::val_3>>>));
nvbench::enum_type<scoped_enum::val_2>,
nvbench::enum_type<scoped_enum::val_3>>>));
} }
void test_unscoped_enum() void test_unscoped_enum()
{ {
#ifndef USING_GCC_7 #ifndef USING_GCC_7
ASSERT( ASSERT((std::is_same_v<nvbench::enum_type_list<unscoped_val_1>,
(std::is_same_v<nvbench::enum_type_list<unscoped_val_1>, nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>));
nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>)); ASSERT((std::is_same_v<nvbench::enum_type_list<unscoped_val_1, unscoped_val_2, unscoped_val_3>,
ASSERT( nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
(std::is_same_v< nvbench::enum_type<unscoped_val_2>,
nvbench::enum_type_list<unscoped_val_1, unscoped_val_2, unscoped_val_3>, nvbench::enum_type<unscoped_val_3>>>));
nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
nvbench::enum_type<unscoped_val_2>,
nvbench::enum_type<unscoped_val_3>>>));
#endif #endif
} }
void test_scoped_enum_type_strings() void test_scoped_enum_type_strings()
{ {
using values = nvbench::enum_type_list<scoped_enum::val_1, using values =
scoped_enum::val_2, nvbench::enum_type_list<scoped_enum::val_1, scoped_enum::val_2, scoped_enum::val_3>;
scoped_enum::val_3>; using val_1 = nvbench::tl::get<0, values>;
using val_1 = nvbench::tl::get<0, values>; using val_2 = nvbench::tl::get<1, values>;
using val_2 = nvbench::tl::get<1, values>; using val_3 = nvbench::tl::get<2, values>;
using val_3 = nvbench::tl::get<2, values>;
ASSERT((nvbench::type_strings<val_1>::input_string() == "1")); ASSERT((nvbench::type_strings<val_1>::input_string() == "1"));
ASSERT((nvbench::type_strings<val_1>::description() == "scoped_enum::val_1")); ASSERT((nvbench::type_strings<val_1>::description() == "scoped_enum::val_1"));
ASSERT((nvbench::type_strings<val_2>::input_string() == "2")); ASSERT((nvbench::type_strings<val_2>::input_string() == "2"));

View File

@@ -34,8 +34,7 @@ void test_empty()
const auto clone_base = axis.clone(); const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr); ASSERT(clone_base.get() != nullptr);
const auto *clone = const auto *clone = dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
ASSERT(clone != nullptr); ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "Empty"); ASSERT(clone->get_name() == "Empty");
@@ -62,8 +61,7 @@ void test_basic()
const auto clone_base = axis.clone(); const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr); ASSERT(clone_base.get() != nullptr);
const auto *clone = const auto *clone = dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
ASSERT(clone != nullptr); ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "Basic"); ASSERT(clone->get_name() == "Basic");

View File

@@ -18,10 +18,10 @@
#include <nvbench/int64_axis.cuh> #include <nvbench/int64_axis.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
#include "test_asserts.cuh"
void test_empty() void test_empty()
{ {
nvbench::int64_axis axis("Empty"); nvbench::int64_axis axis("Empty");
@@ -36,8 +36,7 @@ void test_empty()
const auto clone_base = axis.clone(); const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr); ASSERT(clone_base.get() != nullptr);
const auto *clone = const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
ASSERT(clone != nullptr); ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "Empty"); ASSERT(clone->get_name() == "Empty");
@@ -66,8 +65,7 @@ void test_basic()
const auto clone_base = axis.clone(); const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr); ASSERT(clone_base.get() != nullptr);
const auto *clone = const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
ASSERT(clone != nullptr); ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "BasicAxis"); ASSERT(clone->get_name() == "BasicAxis");
@@ -87,8 +85,7 @@ void test_basic()
void test_power_of_two() void test_power_of_two()
{ {
nvbench::int64_axis axis{"POTAxis"}; nvbench::int64_axis axis{"POTAxis"};
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
nvbench::int64_axis_flags::power_of_two);
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4}; const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16}; const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
@@ -102,14 +99,12 @@ void test_power_of_two()
for (size_t i = 0; i < 8; ++i) for (size_t i = 0; i < 8; ++i)
{ {
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i])); ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(axis.get_description(i) == ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
} }
const auto clone_base = axis.clone(); const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr); ASSERT(clone_base.get() != nullptr);
const auto *clone = const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
ASSERT(clone != nullptr); ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "POTAxis"); ASSERT(clone->get_name() == "POTAxis");
@@ -122,8 +117,7 @@ void test_power_of_two()
for (size_t i = 0; i < 8; ++i) for (size_t i = 0; i < 8; ++i)
{ {
ASSERT(clone->get_input_string(i) == fmt::to_string(ref_inputs[i])); ASSERT(clone->get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(clone->get_description(i) == ASSERT(clone->get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
} }
} }
@@ -250,8 +244,7 @@ void test_update_none_to_pow2()
void test_update_pow2_to_none() void test_update_pow2_to_none()
{ {
nvbench::int64_axis axis{"TestAxis"}; nvbench::int64_axis axis{"TestAxis"};
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
nvbench::int64_axis_flags::power_of_two);
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4}; const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16}; const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
@@ -304,8 +297,7 @@ void test_update_pow2_to_none()
for (size_t i = 0; i < 8; ++i) for (size_t i = 0; i < 8; ++i)
{ {
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i])); ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(axis.get_description(i) == ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
} }
} }
@@ -313,8 +305,7 @@ void test_update_pow2_to_pow2()
{ {
nvbench::int64_axis axis{"TestAxis"}; nvbench::int64_axis axis{"TestAxis"};
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
nvbench::int64_axis_flags::power_of_two);
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4}; const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16}; const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
@@ -369,8 +360,7 @@ void test_update_pow2_to_pow2()
for (size_t i = 0; i < 8; ++i) for (size_t i = 0; i < 8; ++i)
{ {
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i])); ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(axis.get_description(i) == ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
} }
} }

View File

@@ -18,10 +18,10 @@
#include <nvbench/named_values.cuh> #include <nvbench/named_values.cuh>
#include "test_asserts.cuh"
#include <algorithm> #include <algorithm>
#include "test_asserts.cuh"
void test_empty() void test_empty()
{ {
nvbench::named_values vals; nvbench::named_values vals;

View File

@@ -16,15 +16,14 @@
* limitations under the License. * limitations under the License.
*/ */
#include <nvbench/option_parser.cuh>
#include <nvbench/create.cuh> #include <nvbench/create.cuh>
#include <nvbench/option_parser.cuh>
#include <nvbench/type_list.cuh> #include <nvbench/type_list.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h> #include <fmt/format.h>
#include "test_asserts.cuh"
//============================================================================== //==============================================================================
// Declare a couple benchmarks for testing: // Declare a couple benchmarks for testing:
void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); } void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); }
@@ -50,8 +49,7 @@ NVBENCH_BENCH_TYPES(TestBench, NVBENCH_TYPE_AXES(Ts, Us))
namespace namespace
{ {
[[nodiscard]] std::string [[nodiscard]] std::string states_to_string(const std::vector<nvbench::state> &states)
states_to_string(const std::vector<nvbench::state> &states)
{ {
fmt::memory_buffer buffer; fmt::memory_buffer buffer;
std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} " std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} "
@@ -88,7 +86,7 @@ states_to_string(const std::vector<nvbench::state> &states)
// Expects the parser to have a single TestBench benchmark. Runs the benchmark // Expects the parser to have a single TestBench benchmark. Runs the benchmark
// and returns the resulting states. // and returns the resulting states.
[[nodiscard]] const auto& parser_to_states(nvbench::option_parser &parser) [[nodiscard]] const auto &parser_to_states(nvbench::option_parser &parser)
{ {
const auto &benches = parser.get_benchmarks(); const auto &benches = parser.get_benchmarks();
ASSERT(benches.size() == 1); ASSERT(benches.size() == 1);
@@ -267,8 +265,7 @@ void test_int64_axis_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -308,8 +305,7 @@ void test_int64_axis_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -323,8 +319,7 @@ void test_int64_axis_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -369,8 +364,7 @@ void test_int64_axis_pow2_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -384,8 +378,7 @@ void test_int64_axis_pow2_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -425,8 +418,7 @@ void test_int64_axis_pow2_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -440,8 +432,7 @@ void test_int64_axis_pow2_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -486,8 +477,7 @@ void test_int64_axis_none_to_pow2_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -501,8 +491,7 @@ void test_int64_axis_none_to_pow2_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -542,8 +531,7 @@ void test_int64_axis_none_to_pow2_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -557,8 +545,7 @@ void test_int64_axis_none_to_pow2_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "});
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -617,8 +604,7 @@ void test_int64_axis_pow2_to_none_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -658,8 +644,7 @@ void test_int64_axis_pow2_to_none_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -673,8 +658,7 @@ void test_int64_axis_pow2_to_none_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "});
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -719,8 +703,7 @@ void test_float64_axis_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "});
{"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -734,10 +717,7 @@ void test_float64_axis_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse({"--benchmark", parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 3.6 : 1 ] "});
"TestBench",
"--axis",
" Floats [ ] = [ 3.5 : 3.6 : 1 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -777,8 +757,7 @@ void test_float64_axis_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "});
{"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -792,18 +771,14 @@ void test_float64_axis_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse({"--benchmark", parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "});
"TestBench",
"--axis",
" Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"});
{"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -827,8 +802,7 @@ void test_string_axis_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "});
{"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -842,8 +816,7 @@ void test_string_axis_single()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "});
{"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -883,8 +856,7 @@ void test_string_axis_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "});
{"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -951,8 +923,7 @@ void test_type_axis_multi()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "});
{"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "});
const auto test = parser_to_state_string(parser); const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
} }
@@ -1177,9 +1148,8 @@ void test_axis_before_benchmark()
void test_min_samples() void test_min_samples()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "DummyBench", "--min-samples", "12345"});
{"--benchmark", "DummyBench", "--min-samples", "12345"}); const auto &states = parser_to_states(parser);
const auto& states = parser_to_states(parser);
ASSERT(states.size() == 1); ASSERT(states.size() == 1);
ASSERT(states[0].get_min_samples() == 12345); ASSERT(states[0].get_min_samples() == 12345);
@@ -1188,9 +1158,8 @@ void test_min_samples()
void test_min_time() void test_min_time()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "DummyBench", "--min-time", "12345e2"});
{"--benchmark", "DummyBench", "--min-time", "12345e2"}); const auto &states = parser_to_states(parser);
const auto& states = parser_to_states(parser);
ASSERT(states.size() == 1); ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_min_time() - 12345e2) < 1.); ASSERT(std::abs(states[0].get_min_time() - 12345e2) < 1.);
@@ -1199,9 +1168,8 @@ void test_min_time()
void test_max_noise() void test_max_noise()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "DummyBench", "--max-noise", "50.3"});
{"--benchmark", "DummyBench", "--max-noise", "50.3"}); const auto &states = parser_to_states(parser);
const auto& states = parser_to_states(parser);
ASSERT(states.size() == 1); ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_max_noise() - 0.503) < 1.e-4); ASSERT(std::abs(states[0].get_max_noise() - 0.503) < 1.e-4);
@@ -1210,9 +1178,8 @@ void test_max_noise()
void test_skip_time() void test_skip_time()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "DummyBench", "--skip-time", "12345e2"});
{"--benchmark", "DummyBench", "--skip-time", "12345e2"}); const auto &states = parser_to_states(parser);
const auto& states = parser_to_states(parser);
ASSERT(states.size() == 1); ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_skip_time() - 12345e2) < 1.); ASSERT(std::abs(states[0].get_skip_time() - 12345e2) < 1.);
@@ -1221,9 +1188,8 @@ void test_skip_time()
void test_timeout() void test_timeout()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark", "DummyBench", "--timeout", "12345e2"});
{"--benchmark", "DummyBench", "--timeout", "12345e2"}); const auto &states = parser_to_states(parser);
const auto& states = parser_to_states(parser);
ASSERT(states.size() == 1); ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.); ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
@@ -1232,12 +1198,15 @@ void test_timeout()
void test_stopping_criterion() void test_stopping_criterion()
{ {
nvbench::option_parser parser; nvbench::option_parser parser;
parser.parse( parser.parse({"--benchmark",
{"--benchmark", "DummyBench", "DummyBench",
"--stopping-criterion", "entropy", "--stopping-criterion",
"--max-angle", "0.42", "entropy",
"--min-r2", "0.6"}); "--max-angle",
const auto& states = parser_to_states(parser); "0.42",
"--min-r2",
"0.6"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1); ASSERT(states.size() == 1);
ASSERT(states[0].get_stopping_criterion() == "entropy"); ASSERT(states[0].get_stopping_criterion() == "entropy");

View File

@@ -22,12 +22,9 @@
void test_basic() void test_basic()
{ {
ASSERT((nvbench::range(0, 6) == ASSERT((nvbench::range(0, 6) == std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6})); ASSERT((nvbench::range(0, 6, 1) == std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
ASSERT((nvbench::range(0, 6, 1) == ASSERT((nvbench::range(0, 6, 2) == std::vector<nvbench::int64_t>{0, 2, 4, 6}));
std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
ASSERT(
(nvbench::range(0, 6, 2) == std::vector<nvbench::int64_t>{0, 2, 4, 6}));
ASSERT((nvbench::range(0, 6, 3) == std::vector<nvbench::int64_t>{0, 3, 6})); ASSERT((nvbench::range(0, 6, 3) == std::vector<nvbench::int64_t>{0, 3, 6}));
ASSERT((nvbench::range(0, 6, 4) == std::vector<nvbench::int64_t>{0, 4})); ASSERT((nvbench::range(0, 6, 4) == std::vector<nvbench::int64_t>{0, 4}));
ASSERT((nvbench::range(0, 6, 5) == std::vector<nvbench::int64_t>{0, 5})); ASSERT((nvbench::range(0, 6, 5) == std::vector<nvbench::int64_t>{0, 5}));
@@ -37,26 +34,19 @@ void test_basic()
void test_result_type() void test_result_type()
{ {
// All ints should turn into int64 by default: // All ints should turn into int64 by default:
ASSERT((std::is_same_v<decltype(nvbench::range(0ll, 1ll)), ASSERT((std::is_same_v<decltype(nvbench::range(0ll, 1ll)), std::vector<nvbench::int64_t>>));
std::vector<nvbench::int64_t>>)); ASSERT((std::is_same_v<decltype(nvbench::range(0, 1)), std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0, 1)), ASSERT((std::is_same_v<decltype(nvbench::range(0u, 1u)), std::vector<nvbench::int64_t>>));
std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0u, 1u)),
std::vector<nvbench::int64_t>>));
// All floats should turn into float64 by default: // All floats should turn into float64 by default:
ASSERT((std::is_same_v<decltype(nvbench::range(0., 1.)), ASSERT((std::is_same_v<decltype(nvbench::range(0., 1.)), std::vector<nvbench::float64_t>>));
std::vector<nvbench::float64_t>>)); ASSERT((std::is_same_v<decltype(nvbench::range(0.f, 1.f)), std::vector<nvbench::float64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0.f, 1.f)),
std::vector<nvbench::float64_t>>));
// Other types may be explicitly specified: // Other types may be explicitly specified:
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::float32_t, ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::float32_t, nvbench::float32_t>(0.f, 1.f)),
nvbench::float32_t>(0.f, 1.f)),
std::vector<nvbench::float32_t>>)); std::vector<nvbench::float32_t>>));
ASSERT((std::is_same_v< ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::int32_t, nvbench::int32_t>(0, 1)),
decltype(nvbench::range<nvbench::int32_t, nvbench::int32_t>(0, 1)), std::vector<nvbench::int32_t>>));
std::vector<nvbench::int32_t>>));
} }
void test_fp_tolerance() void test_fp_tolerance()
@@ -68,10 +58,8 @@ void test_fp_tolerance()
const nvbench::float32_t stride = 1e-4f; const nvbench::float32_t stride = 1e-4f;
for (std::size_t size = 1; size < 1024; ++size) for (std::size_t size = 1; size < 1024; ++size)
{ {
const nvbench::float32_t end = const nvbench::float32_t end = start + stride * static_cast<nvbench::float32_t>(size - 1);
start + stride * static_cast<nvbench::float32_t>(size - 1); ASSERT_MSG(nvbench::range(start, end, stride).size() == size, "size={}", size);
ASSERT_MSG(nvbench::range(start, end, stride).size() == size,
"size={}", size);
} }
} }

View File

@@ -2,18 +2,17 @@
#include "test_asserts.cuh" #include "test_asserts.cuh"
namespace namespace
{ {
__global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b) __global__ void multiply5(const int32_t *__restrict__ a, int32_t *__restrict__ b)
{ {
const auto id = blockIdx.x * blockDim.x + threadIdx.x; const auto id = blockIdx.x * blockDim.x + threadIdx.x;
b[id] = 5 * a[id]; b[id] = 5 * a[id];
}
} }
} // namespace
int main() int main()
{ {
multiply5<<<256, 256>>>(nullptr, nullptr); multiply5<<<256, 256>>>(nullptr, nullptr);
try try

View File

@@ -18,14 +18,13 @@
#include <nvbench/detail/ring_buffer.cuh> #include <nvbench/detail/ring_buffer.cuh>
#include "test_asserts.cuh"
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "test_asserts.cuh"
template <typename T> template <typename T>
bool equal(const nvbench::detail::ring_buffer<T> &buffer, bool equal(const nvbench::detail::ring_buffer<T> &buffer, const std::vector<T> &reference)
const std::vector<T> &reference)
{ {
return std::equal(buffer.begin(), buffer.end(), reference.begin()); return std::equal(buffer.begin(), buffer.end(), reference.begin());
} }

Some files were not shown because too many files have changed in this diff Show More