mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Formatting updates.
This commit is contained in:
@@ -41,16 +41,28 @@ CompactNamespaces: false
|
||||
ContinuationIndentWidth: 2
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<cub'
|
||||
Priority: 1
|
||||
- Regex: '^<thrust'
|
||||
Priority: 2
|
||||
- Regex: '^<cuda'
|
||||
Priority: 3
|
||||
- Regex: '^<nvbench'
|
||||
Priority: 1
|
||||
- Regex: '^<cub'
|
||||
Priority: 2
|
||||
- Regex: '^<thrust'
|
||||
Priority: 3
|
||||
- Regex: '^<cuda/'
|
||||
Priority: 4
|
||||
- Regex: '^<[a-z]*>$'
|
||||
- Regex: '^<cuda'
|
||||
Priority: 5
|
||||
- Regex: '^<nvml'
|
||||
Priority: 6
|
||||
- Regex: '^<cupti'
|
||||
Priority: 7
|
||||
- Regex: '^<nvperf'
|
||||
Priority: 8
|
||||
- Regex: '^<nlohmann'
|
||||
Priority: 9
|
||||
- Regex: '^<fmt'
|
||||
Priority: 10
|
||||
- Regex: '^<[a-z_]*>$'
|
||||
Priority: 11
|
||||
IndentCaseLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentWidth: 2
|
||||
|
||||
@@ -34,7 +34,7 @@ CCCL uses [Development Containers](https://containers.dev/) to provide consisten
|
||||
|
||||
- Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it.
|
||||
|
||||

|
||||

|
||||
|
||||
4. Select an environment with the desired CTK and host compiler from the list:
|
||||
|
||||
@@ -136,7 +136,7 @@ For more information, see the `.devcontainer/make_devcontainers.sh --help` messa
|
||||
|
||||
2. Install WSL 2 by running:
|
||||
```bash
|
||||
wsl --install
|
||||
wsl --install
|
||||
```
|
||||
This should probably install Ubuntu distro as a default.
|
||||
|
||||
@@ -182,14 +182,14 @@ then run `sudo systemctl restart docker.service`.
|
||||
10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located).
|
||||
|
||||
11. If prompted, choose `Reopen in Container`.
|
||||
|
||||
|
||||
- If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`.
|
||||
|
||||
12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order.
|
||||
|
||||
From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration:
|
||||
|
||||
13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
|
||||
13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
|
||||
|
||||
> Failed opening a web browser at https://github.com/login/device
|
||||
exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH
|
||||
|
||||
@@ -304,4 +304,3 @@ main() {
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
||||
|
||||
@@ -14,4 +14,3 @@
|
||||
#
|
||||
# Only add commits that are pure formatting changes (e.g. clang-format version changes, etc).
|
||||
8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ long_ext_count = 10
|
||||
|
||||
class Target:
|
||||
"""Represents a single line read for a .ninja_log file."""
|
||||
|
||||
def __init__(self, start, end):
|
||||
"""Creates a target object by passing in the start/end times in seconds
|
||||
as a float."""
|
||||
@@ -94,9 +95,9 @@ class Target:
|
||||
"""
|
||||
# Allow for modest floating-point errors
|
||||
epsilon = 0.000002
|
||||
if (self.weighted_duration > self.Duration() + epsilon):
|
||||
print('%s > %s?' % (self.weighted_duration, self.Duration()))
|
||||
assert (self.weighted_duration <= self.Duration() + epsilon)
|
||||
if self.weighted_duration > self.Duration() + epsilon:
|
||||
print("%s > %s?" % (self.weighted_duration, self.Duration()))
|
||||
assert self.weighted_duration <= self.Duration() + epsilon
|
||||
return self.weighted_duration
|
||||
|
||||
def DescribeTargets(self):
|
||||
@@ -104,10 +105,10 @@ class Target:
|
||||
# Some build steps generate dozens of outputs - handle them sanely.
|
||||
# The max_length was chosen so that it can fit most of the long
|
||||
# single-target names, while minimizing word wrapping.
|
||||
result = ', '.join(self.targets)
|
||||
result = ", ".join(self.targets)
|
||||
max_length = 65
|
||||
if len(result) > max_length:
|
||||
result = result[:max_length] + '...'
|
||||
result = result[:max_length] + "..."
|
||||
return result
|
||||
|
||||
|
||||
@@ -121,12 +122,11 @@ def ReadTargets(log, show_all):
|
||||
# targets.
|
||||
if not header:
|
||||
return []
|
||||
assert header == '# ninja log v5\n', \
|
||||
'unrecognized ninja log version %r' % header
|
||||
assert header == "# ninja log v5\n", "unrecognized ninja log version %r" % header
|
||||
targets_dict = {}
|
||||
last_end_seen = 0.0
|
||||
for line in log:
|
||||
parts = line.strip().split('\t')
|
||||
parts = line.strip().split("\t")
|
||||
if len(parts) != 5:
|
||||
# If ninja.exe is rudely halted then the .ninja_log file may be
|
||||
# corrupt. Silently continue.
|
||||
@@ -165,17 +165,17 @@ def ReadTargets(log, show_all):
|
||||
def GetExtension(target, extra_patterns):
|
||||
"""Return the file extension that best represents a target.
|
||||
|
||||
For targets that generate multiple outputs it is important to return a
|
||||
consistent 'canonical' extension. Ultimately the goal is to group build steps
|
||||
by type."""
|
||||
For targets that generate multiple outputs it is important to return a
|
||||
consistent 'canonical' extension. Ultimately the goal is to group build steps
|
||||
by type."""
|
||||
for output in target.targets:
|
||||
if extra_patterns:
|
||||
for fn_pattern in extra_patterns.split(';'):
|
||||
if fnmatch.fnmatch(output, '*' + fn_pattern + '*'):
|
||||
for fn_pattern in extra_patterns.split(";"):
|
||||
if fnmatch.fnmatch(output, "*" + fn_pattern + "*"):
|
||||
return fn_pattern
|
||||
# Not a true extension, but a good grouping.
|
||||
if output.endswith('type_mappings'):
|
||||
extension = 'type_mappings'
|
||||
if output.endswith("type_mappings"):
|
||||
extension = "type_mappings"
|
||||
break
|
||||
|
||||
# Capture two extensions if present. For example: file.javac.jar should
|
||||
@@ -185,26 +185,26 @@ def GetExtension(target, extra_patterns):
|
||||
extension = ext2 + ext1 # Preserve the order in the file name.
|
||||
|
||||
if len(extension) == 0:
|
||||
extension = '(no extension found)'
|
||||
extension = "(no extension found)"
|
||||
|
||||
if ext1 in ['.pdb', '.dll', '.exe']:
|
||||
extension = 'PEFile (linking)'
|
||||
if ext1 in [".pdb", ".dll", ".exe"]:
|
||||
extension = "PEFile (linking)"
|
||||
# Make sure that .dll and .exe are grouped together and that the
|
||||
# .dll.lib files don't cause these to be listed as libraries
|
||||
break
|
||||
if ext1 in ['.so', '.TOC']:
|
||||
extension = '.so (linking)'
|
||||
if ext1 in [".so", ".TOC"]:
|
||||
extension = ".so (linking)"
|
||||
# Attempt to identify linking, avoid identifying as '.TOC'
|
||||
break
|
||||
# Make sure .obj files don't get categorized as mojo files
|
||||
if ext1 in ['.obj', '.o']:
|
||||
if ext1 in [".obj", ".o"]:
|
||||
break
|
||||
# Jars are the canonical output of java targets.
|
||||
if ext1 == '.jar':
|
||||
if ext1 == ".jar":
|
||||
break
|
||||
# Normalize all mojo related outputs to 'mojo'.
|
||||
if output.count('.mojom') > 0:
|
||||
extension = 'mojo'
|
||||
if output.count(".mojom") > 0:
|
||||
extension = "mojo"
|
||||
break
|
||||
return extension
|
||||
|
||||
@@ -229,8 +229,8 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
|
||||
if target.end > latest:
|
||||
latest = target.end
|
||||
total_cpu_time += target.Duration()
|
||||
task_start_stop_times.append((target.start, 'start', target))
|
||||
task_start_stop_times.append((target.end, 'stop', target))
|
||||
task_start_stop_times.append((target.start, "start", target))
|
||||
task_start_stop_times.append((target.end, "stop", target))
|
||||
length = latest - earliest
|
||||
weighted_total = 0.0
|
||||
|
||||
@@ -256,10 +256,10 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
|
||||
if num_running > 0:
|
||||
# Update the total weighted time up to this moment.
|
||||
last_weighted_time += (time - last_time) / float(num_running)
|
||||
if action_name == 'start':
|
||||
if action_name == "start":
|
||||
# Record the total weighted task time when this task starts.
|
||||
running_tasks[target] = last_weighted_time
|
||||
if action_name == 'stop':
|
||||
if action_name == "stop":
|
||||
# Record the change in the total weighted task time while this task
|
||||
# ran.
|
||||
weighted_duration = last_weighted_time - running_tasks[target]
|
||||
@@ -267,24 +267,27 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
|
||||
weighted_total += weighted_duration
|
||||
del running_tasks[target]
|
||||
last_time = time
|
||||
assert (len(running_tasks) == 0)
|
||||
assert len(running_tasks) == 0
|
||||
|
||||
# Warn if the sum of weighted times is off by more than half a second.
|
||||
if abs(length - weighted_total) > 500:
|
||||
print('Warning: Possible corrupt ninja log, results may be '
|
||||
'untrustworthy. Length = %.3f, weighted total = %.3f' %
|
||||
(length, weighted_total))
|
||||
print(
|
||||
"Warning: Possible corrupt ninja log, results may be "
|
||||
"untrustworthy. Length = %.3f, weighted total = %.3f"
|
||||
% (length, weighted_total)
|
||||
)
|
||||
|
||||
# Print the slowest build steps:
|
||||
print(' Longest build steps:')
|
||||
print(" Longest build steps:")
|
||||
if elapsed_time_sorting:
|
||||
entries.sort(key=lambda x: x.Duration())
|
||||
else:
|
||||
entries.sort(key=lambda x: x.WeightedDuration())
|
||||
for target in entries[-long_count:]:
|
||||
print(' %8.1f weighted s to build %s (%.1f s elapsed time)' %
|
||||
(target.WeightedDuration(), target.DescribeTargets(),
|
||||
target.Duration()))
|
||||
print(
|
||||
" %8.1f weighted s to build %s (%.1f s elapsed time)"
|
||||
% (target.WeightedDuration(), target.DescribeTargets(), target.Duration())
|
||||
)
|
||||
|
||||
# Sum up the time by file extension/type of the output file
|
||||
count_by_ext = {}
|
||||
@@ -293,51 +296,56 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
|
||||
# Scan through all of the targets to build up per-extension statistics.
|
||||
for target in entries:
|
||||
extension = GetExtension(target, extra_step_types)
|
||||
time_by_ext[extension] = time_by_ext.get(extension,
|
||||
0) + target.Duration()
|
||||
weighted_time_by_ext[extension] = weighted_time_by_ext.get(
|
||||
extension, 0) + target.WeightedDuration()
|
||||
time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration()
|
||||
weighted_time_by_ext[extension] = (
|
||||
weighted_time_by_ext.get(extension, 0) + target.WeightedDuration()
|
||||
)
|
||||
count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
|
||||
|
||||
print(' Time by build-step type:')
|
||||
print(" Time by build-step type:")
|
||||
# Copy to a list with extension name and total time swapped, to (time, ext)
|
||||
if elapsed_time_sorting:
|
||||
weighted_time_by_ext_sorted = sorted(
|
||||
(y, x) for (x, y) in time_by_ext.items())
|
||||
weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in time_by_ext.items())
|
||||
else:
|
||||
weighted_time_by_ext_sorted = sorted(
|
||||
(y, x) for (x, y) in weighted_time_by_ext.items())
|
||||
(y, x) for (x, y) in weighted_time_by_ext.items()
|
||||
)
|
||||
# Print the slowest build target types:
|
||||
for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
|
||||
print(
|
||||
' %8.1f s weighted time to generate %d %s files '
|
||||
'(%1.1f s elapsed time sum)' %
|
||||
(time, count_by_ext[extension], extension, time_by_ext[extension]))
|
||||
" %8.1f s weighted time to generate %d %s files "
|
||||
"(%1.1f s elapsed time sum)"
|
||||
% (time, count_by_ext[extension], extension, time_by_ext[extension])
|
||||
)
|
||||
|
||||
print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx '
|
||||
'parallelism)' %
|
||||
(length, total_cpu_time, total_cpu_time * 1.0 / length))
|
||||
print(' %d build steps completed, average of %1.2f/s' %
|
||||
(len(entries), len(entries) / (length)))
|
||||
print(
|
||||
" %.1f s weighted time (%.1f s elapsed time sum, %1.1fx "
|
||||
"parallelism)" % (length, total_cpu_time, total_cpu_time * 1.0 / length)
|
||||
)
|
||||
print(
|
||||
" %d build steps completed, average of %1.2f/s"
|
||||
% (len(entries), len(entries) / (length))
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
log_file = '.ninja_log'
|
||||
metrics_file = 'siso_metrics.json'
|
||||
log_file = ".ninja_log"
|
||||
metrics_file = "siso_metrics.json"
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-C', dest='build_directory', help='Build directory.')
|
||||
parser.add_argument("-C", dest="build_directory", help="Build directory.")
|
||||
parser.add_argument(
|
||||
'-s',
|
||||
'--step-types',
|
||||
help='semicolon separated fnmatch patterns for build-step grouping')
|
||||
"-s",
|
||||
"--step-types",
|
||||
help="semicolon separated fnmatch patterns for build-step grouping",
|
||||
)
|
||||
parser.add_argument(
|
||||
'-e',
|
||||
'--elapsed_time_sorting',
|
||||
"-e",
|
||||
"--elapsed_time_sorting",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help='Sort output by elapsed time instead of weighted time')
|
||||
parser.add_argument('--log-file',
|
||||
help="specific ninja log file to analyze.")
|
||||
action="store_true",
|
||||
help="Sort output by elapsed time instead of weighted time",
|
||||
)
|
||||
parser.add_argument("--log-file", help="specific ninja log file to analyze.")
|
||||
args, _extra_args = parser.parse_known_args()
|
||||
if args.build_directory:
|
||||
log_file = os.path.join(args.build_directory, log_file)
|
||||
@@ -348,34 +356,35 @@ def main():
|
||||
# Offer a convenient way to add extra step types automatically,
|
||||
# including when this script is run by autoninja. get() returns None if
|
||||
# the variable isn't set.
|
||||
args.step_types = os.environ.get('chromium_step_types')
|
||||
args.step_types = os.environ.get("chromium_step_types")
|
||||
if args.step_types:
|
||||
# Make room for the extra build types.
|
||||
global long_ext_count
|
||||
long_ext_count += len(args.step_types.split(';'))
|
||||
long_ext_count += len(args.step_types.split(";"))
|
||||
|
||||
if os.path.exists(metrics_file):
|
||||
# Automatically handle summarizing siso builds.
|
||||
cmd = ['siso.bat' if 'win32' in sys.platform else 'siso']
|
||||
cmd.extend(['metrics', 'summary'])
|
||||
cmd = ["siso.bat" if "win32" in sys.platform else "siso"]
|
||||
cmd.extend(["metrics", "summary"])
|
||||
if args.build_directory:
|
||||
cmd.extend(['-C', args.build_directory])
|
||||
cmd.extend(["-C", args.build_directory])
|
||||
if args.step_types:
|
||||
cmd.extend(['--step_types', args.step_types])
|
||||
cmd.extend(["--step_types", args.step_types])
|
||||
if args.elapsed_time_sorting:
|
||||
cmd.append('--elapsed_time_sorting')
|
||||
cmd.append("--elapsed_time_sorting")
|
||||
subprocess.run(cmd)
|
||||
else:
|
||||
try:
|
||||
with open(log_file, 'r') as log:
|
||||
with open(log_file, "r") as log:
|
||||
entries = ReadTargets(log, False)
|
||||
if entries:
|
||||
SummarizeEntries(entries, args.step_types,
|
||||
args.elapsed_time_sorting)
|
||||
SummarizeEntries(
|
||||
entries, args.step_types, args.elapsed_time_sorting
|
||||
)
|
||||
except IOError:
|
||||
print('Log file %r not found, no build summary created.' % log_file)
|
||||
print("Log file %r not found, no build summary created." % log_file)
|
||||
return errno.ENOENT
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -37,5 +37,5 @@ if [ $requests_diff -eq 0 ]; then
|
||||
else
|
||||
hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
|
||||
echo "sccache hit rate: $hit_rate%" >&2
|
||||
echo "$hit_rate"
|
||||
echo "$hit_rate"
|
||||
fi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Detect the langauge standards supported by the current compilers.
|
||||
# Detect the language standards supported by the current compilers.
|
||||
#
|
||||
# Usage: detect_supported_cxx_standards(<var_prefix> <lang> <standards>)
|
||||
#
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
# Passes all args directly to execute_process while setting up the following
|
||||
# results variables and propogating them to the caller's scope:
|
||||
# results variables and propagating them to the caller's scope:
|
||||
#
|
||||
# - nvbench_process_exit_code
|
||||
# - nvbench_process_stdout
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
// a potential macro collision and halts.
|
||||
//
|
||||
// Hacky way to build a string, but it works on all tested platforms.
|
||||
#define NVBench_MACRO_CHECK(MACRO, HEADER) \
|
||||
NVBench_MACRO_CHECK_IMPL(Identifier MACRO should not be used from NVBench \
|
||||
headers due to conflicts with HEADER macros.)
|
||||
#define NVBench_MACRO_CHECK(MACRO, HEADER) \
|
||||
NVBench_MACRO_CHECK_IMPL( \
|
||||
Identifier MACRO should not be used from NVBench headers due to conflicts with HEADER macros.)
|
||||
|
||||
// Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we
|
||||
// don't want to #include any headers other than the one being tested.
|
||||
@@ -34,8 +34,8 @@
|
||||
// library implementations unconditionally `#undef` these macros, which then
|
||||
// causes random failures later.
|
||||
// Leaving these commented out as a warning: Here be dragons.
|
||||
//#define min(...) NVBench_MACRO_CHECK('min', windows.h)
|
||||
//#define max(...) NVBench_MACRO_CHECK('max', windows.h)
|
||||
// #define min(...) NVBench_MACRO_CHECK('min', windows.h)
|
||||
// #define max(...) NVBench_MACRO_CHECK('max', windows.h)
|
||||
|
||||
// termios.h conflicts (NVIDIA/thrust#1547)
|
||||
#define B0 NVBench_MACRO_CHECK("B0", termios.h)
|
||||
|
||||
@@ -90,7 +90,7 @@
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--stopping-criterion <criterion>`
|
||||
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
|
||||
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
|
||||
samples were collected.
|
||||
* Only applies to Cold measurements.
|
||||
* Default is stdrel (`--stopping-criterion stdrel`)
|
||||
|
||||
@@ -24,37 +24,33 @@
|
||||
template <int ItemsPerThread>
|
||||
__global__ void kernel(std::size_t stride,
|
||||
std::size_t elements,
|
||||
const nvbench::int32_t * __restrict__ in,
|
||||
const nvbench::int32_t *__restrict__ in,
|
||||
nvbench::int32_t *__restrict__ out)
|
||||
{
|
||||
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const std::size_t step = gridDim.x * blockDim.x;
|
||||
|
||||
for (std::size_t i = stride * tid;
|
||||
i < stride * elements;
|
||||
i += stride * step)
|
||||
for (std::size_t i = stride * tid; i < stride * elements; i += stride * step)
|
||||
{
|
||||
for (int j = 0; j < ItemsPerThread; j++)
|
||||
{
|
||||
const auto read_id = (ItemsPerThread * i + j) % elements;
|
||||
const auto read_id = (ItemsPerThread * i + j) % elements;
|
||||
const auto write_id = tid + j * elements;
|
||||
out[write_id] = in[read_id];
|
||||
out[write_id] = in[read_id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// `throughput_bench` copies a 128 MiB buffer of int32_t, and reports throughput
|
||||
// and cache hit rates.
|
||||
//
|
||||
// Calling state.collect_*() enables particular metric collection if nvbench
|
||||
// was build with CUPTI support (CMake option: -DNVBench_ENABLE_CUPTI=ON).
|
||||
template <int ItemsPerThread>
|
||||
void throughput_bench(nvbench::state &state,
|
||||
nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
|
||||
void throughput_bench(nvbench::state &state, nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
|
||||
{
|
||||
// Allocate input data:
|
||||
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
|
||||
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
|
||||
const std::size_t elements = 128 * 1024 * 1024 / sizeof(nvbench::int32_t);
|
||||
thrust::device_vector<nvbench::int32_t> input(elements);
|
||||
thrust::device_vector<nvbench::int32_t> output(elements * ItemsPerThread);
|
||||
@@ -72,12 +68,11 @@ void throughput_bench(nvbench::state &state,
|
||||
static_cast<int>((elements + threads_in_block - 1) / threads_in_block);
|
||||
|
||||
state.exec([&](nvbench::launch &launch) {
|
||||
kernel<ItemsPerThread>
|
||||
<<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
|
||||
stride,
|
||||
elements,
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()));
|
||||
kernel<ItemsPerThread><<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
|
||||
stride,
|
||||
elements,
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -71,18 +71,16 @@ void copy_sweep_grid_shape(nvbench::state &state)
|
||||
thrust::device_vector<nvbench::int32_t> in(num_values, 0);
|
||||
thrust::device_vector<nvbench::int32_t> out(num_values, 0);
|
||||
|
||||
state.exec(
|
||||
[block_size,
|
||||
num_blocks,
|
||||
num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(
|
||||
in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
state.exec([block_size,
|
||||
num_blocks,
|
||||
num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(copy_sweep_grid_shape)
|
||||
// Every second power of two from 64->1024:
|
||||
@@ -107,15 +105,12 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list<ValueType>)
|
||||
thrust::device_vector<ValueType> in(num_values, 0);
|
||||
thrust::device_vector<ValueType> out(num_values, 0);
|
||||
|
||||
state.exec(
|
||||
[num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
state.exec([num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
|
||||
});
|
||||
}
|
||||
// Define a type_list to use for the type axis:
|
||||
using cts_types = nvbench::type_list<nvbench::uint8_t,
|
||||
@@ -131,11 +126,10 @@ NVBENCH_BENCH_TYPES(copy_type_sweep, NVBENCH_TYPE_AXES(cts_types));
|
||||
// Convert 64 MiB of InputTypes to OutputTypes, represented with various
|
||||
// value_types.
|
||||
template <typename InputType, typename OutputType>
|
||||
void copy_type_conversion_sweep(nvbench::state &state,
|
||||
nvbench::type_list<InputType, OutputType>)
|
||||
void copy_type_conversion_sweep(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
{
|
||||
// Optional: Skip narrowing conversions.
|
||||
if constexpr(sizeof(InputType) > sizeof(OutputType))
|
||||
if constexpr (sizeof(InputType) > sizeof(OutputType))
|
||||
{
|
||||
state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType).");
|
||||
return;
|
||||
@@ -154,15 +148,12 @@ void copy_type_conversion_sweep(nvbench::state &state,
|
||||
thrust::device_vector<InputType> in(num_values, 0);
|
||||
thrust::device_vector<OutputType> out(num_values, 0);
|
||||
|
||||
state.exec(
|
||||
[num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
state.exec([num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
|
||||
});
|
||||
}
|
||||
// Optional: Skip when InputType == OutputType. This approach avoids
|
||||
// instantiating the benchmark at all.
|
||||
@@ -178,6 +169,5 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
|
||||
nvbench::float32_t,
|
||||
nvbench::int64_t,
|
||||
nvbench::float64_t>;
|
||||
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep,
|
||||
NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
|
||||
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
|
||||
.set_type_axes_names({"In", "Out"});
|
||||
|
||||
@@ -36,10 +36,7 @@ public:
|
||||
|
||||
protected:
|
||||
// Setup the criterion in the `do_initialize()` method:
|
||||
virtual void do_initialize() override
|
||||
{
|
||||
m_num_samples = 0;
|
||||
}
|
||||
virtual void do_initialize() override { m_num_samples = 0; }
|
||||
|
||||
// Process new measurements in the `add_measurement()` method:
|
||||
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
|
||||
@@ -52,7 +49,6 @@ protected:
|
||||
{
|
||||
return m_num_samples >= m_params.get_int64("max-samples");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Register the criterion with NVBench:
|
||||
@@ -71,7 +67,7 @@ void throughput_bench(nvbench::state &state)
|
||||
state.add_global_memory_writes<nvbench::int32_t>(num_values);
|
||||
|
||||
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
|
||||
// Enum to use as parameter axis:
|
||||
@@ -68,12 +67,10 @@ void runtime_enum_sweep_string(nvbench::state &state)
|
||||
// Create inputs, etc, configure runtime kernel parameters, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
NVBENCH_BENCH(runtime_enum_sweep_string)
|
||||
.add_string_axis("MyEnum", {"A", "B", "C"});
|
||||
NVBENCH_BENCH(runtime_enum_sweep_string).add_string_axis("MyEnum", {"A", "B", "C"});
|
||||
|
||||
//==============================================================================
|
||||
// Sweep through enum values at runtime using an int64 axis.
|
||||
@@ -97,9 +94,8 @@ void runtime_enum_sweep_int64(nvbench::state &state)
|
||||
// Create inputs, etc, configure runtime kernel parameters, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
NVBENCH_BENCH(runtime_enum_sweep_int64)
|
||||
.add_int64_axis("MyEnum",
|
||||
@@ -178,12 +174,10 @@ void compile_time_enum_sweep(nvbench::state &state,
|
||||
// Template parameters, static dispatch, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
using MyEnumList =
|
||||
nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
|
||||
using MyEnumList = nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
|
||||
NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
|
||||
.set_type_axes_names({"MyEnum"});
|
||||
|
||||
@@ -199,16 +193,14 @@ NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
|
||||
// * `-12` (struct std::integral_constant<int,-12>)
|
||||
// ```
|
||||
template <nvbench::int32_t IntValue>
|
||||
void compile_time_int_sweep(nvbench::state &state,
|
||||
nvbench::type_list<nvbench::enum_type<IntValue>>)
|
||||
void compile_time_int_sweep(nvbench::state &state, nvbench::type_list<nvbench::enum_type<IntValue>>)
|
||||
{
|
||||
// Use IntValue in compile time contexts.
|
||||
// Template parameters, static dispatch, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
using MyInts = nvbench::enum_type_list<0, 16, 4096, -12>;
|
||||
NVBENCH_BENCH_TYPES(compile_time_int_sweep, NVBENCH_TYPE_AXES(MyInts))
|
||||
|
||||
@@ -53,9 +53,7 @@ void sequence_bench(nvbench::state &state)
|
||||
|
||||
// nvbench::exec_tag::sync indicates that this will implicitly sync:
|
||||
state.exec(nvbench::exec_tag::sync, [&data](nvbench::launch &launch) {
|
||||
thrust::sequence(thrust::device.on(launch.get_stream()),
|
||||
data.begin(),
|
||||
data.end());
|
||||
thrust::sequence(thrust::device.on(launch.get_stream()), data.begin(), data.end());
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(sequence_bench);
|
||||
|
||||
@@ -23,8 +23,8 @@
|
||||
|
||||
// Thrust simplifies memory management, etc:
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/sequence.h>
|
||||
|
||||
// mod2_inplace performs an in-place mod2 over every element in `data`. `data`
|
||||
@@ -54,7 +54,7 @@ void mod2_inplace(nvbench::state &state)
|
||||
state.exec(nvbench::exec_tag::timer,
|
||||
// Lambda now takes a `timer` argument:
|
||||
[&input, &data, num_values](nvbench::launch &launch, auto &timer) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
|
||||
// Reset working data:
|
||||
thrust::copy(thrust::device.on(launch.get_stream()),
|
||||
|
||||
@@ -72,14 +72,12 @@ NVBENCH_BENCH(runtime_skip)
|
||||
// Two type axes are swept, but configurations where InputType == OutputType are
|
||||
// skipped.
|
||||
template <typename InputType, typename OutputType>
|
||||
void skip_overload(nvbench::state &state,
|
||||
nvbench::type_list<InputType, OutputType>)
|
||||
void skip_overload(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
{
|
||||
// This is a contrived example that focuses on the skip overloads, so this is
|
||||
// just a sleep kernel:
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
// Overload of skip_overload that is called when InputType == OutputType.
|
||||
template <typename T>
|
||||
@@ -107,9 +105,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
{
|
||||
// This is a contrived example that focuses on the skip overloads, so this is
|
||||
// just a sleep kernel:
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
// Enable this overload if InputType is larger than OutputType
|
||||
template <typename InputType, typename OutputType>
|
||||
@@ -119,10 +116,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
state.skip("sizeof(InputType) > sizeof(OutputType).");
|
||||
}
|
||||
// The same type_list is used for both inputs/outputs.
|
||||
using sn_types = nvbench::type_list<nvbench::int8_t,
|
||||
nvbench::int16_t,
|
||||
nvbench::int32_t,
|
||||
nvbench::int64_t>;
|
||||
using sn_types =
|
||||
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
|
||||
// Setup benchmark:
|
||||
NVBENCH_BENCH_TYPES(skip_sfinae, NVBENCH_TYPE_AXES(sn_types, sn_types))
|
||||
.set_type_axes_names({"In", "Out"});
|
||||
|
||||
@@ -52,7 +52,7 @@ void stream_bench(nvbench::state &state)
|
||||
state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream));
|
||||
|
||||
state.exec([&input, &output, num_values](nvbench::launch &) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
copy(thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
num_values);
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
void summary_example(nvbench::state &state)
|
||||
{
|
||||
// Fetch parameters and compute duration in seconds:
|
||||
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
|
||||
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
|
||||
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
|
||||
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
|
||||
const auto duration = ms * 1e-3 + us * 1e-6;
|
||||
|
||||
// Add a new column to the summary table with the derived duration used by the benchmark.
|
||||
|
||||
@@ -51,7 +51,7 @@ void throughput_bench(nvbench::state &state)
|
||||
state.add_global_memory_writes<nvbench::int32_t>(num_values);
|
||||
|
||||
state.exec([&input, &output, num_values](nvbench::launch &launch) {
|
||||
(void) num_values; // clang thinks this is unused...
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
/*
|
||||
* Copyright 2021 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
* Copyright 2021 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/main.cuh>
|
||||
|
||||
@@ -24,7 +24,7 @@ int main(int argc, char const *const *argv)
|
||||
try
|
||||
{
|
||||
// If no args, substitute a new argv that prints the version
|
||||
std::vector<const char*> alt_argv;
|
||||
std::vector<const char *> alt_argv;
|
||||
if (argc == 1)
|
||||
{
|
||||
alt_argv.push_back("--version");
|
||||
@@ -36,7 +36,7 @@ try
|
||||
NVBENCH_CUDA_CALL(cudaDeviceReset());
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception & e)
|
||||
catch (std::exception &e)
|
||||
{
|
||||
std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n";
|
||||
return 1;
|
||||
|
||||
@@ -19,13 +19,13 @@
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "axis_base.cuh"
|
||||
#include <nvbench/axis_base.cuh>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
@@ -18,9 +18,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/runner.cuh>
|
||||
#include <nvbench/type_list.cuh>
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ struct benchmark_manager
|
||||
* benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA
|
||||
* environment in `main`.
|
||||
*/
|
||||
void initialize();
|
||||
void initialize();
|
||||
|
||||
/**
|
||||
* Register a new benchmark.
|
||||
|
||||
@@ -17,9 +17,8 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/benchmark_manager.cuh>
|
||||
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -37,8 +36,8 @@ benchmark_manager &benchmark_manager::get()
|
||||
|
||||
void benchmark_manager::initialize()
|
||||
{
|
||||
const auto& mgr = device_manager::get();
|
||||
for (auto& bench : m_benchmarks)
|
||||
const auto &mgr = device_manager::get();
|
||||
for (auto &bench : m_benchmarks)
|
||||
{
|
||||
if (!bench->get_is_cpu_only())
|
||||
{
|
||||
|
||||
@@ -17,12 +17,10 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/blocking_kernel.cuh>
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <cuda/std/chrono>
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace nvbench
|
||||
@@ -40,14 +39,14 @@ public:
|
||||
/**
|
||||
* @return The singleton criterion_manager instance.
|
||||
*/
|
||||
static criterion_manager& get();
|
||||
static criterion_manager &get();
|
||||
|
||||
/**
|
||||
* Register a new stopping criterion.
|
||||
*/
|
||||
nvbench::stopping_criterion_base& add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
|
||||
nvbench::stopping_criterion_base& get_criterion(const std::string& name);
|
||||
const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const;
|
||||
nvbench::stopping_criterion_base &add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
|
||||
nvbench::stopping_criterion_base &get_criterion(const std::string &name);
|
||||
const nvbench::stopping_criterion_base &get_criterion(const std::string &name) const;
|
||||
|
||||
using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
|
||||
params_description get_params_description() const;
|
||||
|
||||
@@ -41,7 +41,7 @@ criterion_manager &criterion_manager::get()
|
||||
return registry;
|
||||
}
|
||||
|
||||
stopping_criterion_base& criterion_manager::get_criterion(const std::string& name)
|
||||
stopping_criterion_base &criterion_manager::get_criterion(const std::string &name)
|
||||
{
|
||||
auto iter = m_map.find(name);
|
||||
if (iter == m_map.end())
|
||||
@@ -51,7 +51,8 @@ stopping_criterion_base& criterion_manager::get_criterion(const std::string& nam
|
||||
return *iter->second.get();
|
||||
}
|
||||
|
||||
const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const
|
||||
const nvbench::stopping_criterion_base &
|
||||
criterion_manager::get_criterion(const std::string &name) const
|
||||
{
|
||||
auto iter = m_map.find(name);
|
||||
if (iter == m_map.end())
|
||||
@@ -69,8 +70,7 @@ stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criteri
|
||||
|
||||
if (!success)
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"Stopping criterion \"{}\" is already registered.", name);
|
||||
NVBENCH_THROW(std::runtime_error, "Stopping criterion \"{}\" is already registered.", name);
|
||||
}
|
||||
|
||||
return *it->second.get();
|
||||
|
||||
@@ -16,14 +16,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/csv_printer.cuh>
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/csv_printer.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <nvbench/internal/table_builder.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -169,7 +167,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
std::size_t remaining = table.m_columns.size();
|
||||
for (const auto &col : table.m_columns)
|
||||
{
|
||||
fmt::format_to(std::back_inserter(buffer), "{}{}", col.rows[i], (--remaining == 0) ? "" : ",");
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
"{}{}",
|
||||
col.rows[i],
|
||||
(--remaining == 0) ? "" : ",");
|
||||
}
|
||||
fmt::format_to(std::back_inserter(buffer), "\n");
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/cupti_profiler.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
|
||||
@@ -54,7 +53,9 @@ void nvpw_call(const NVPA_Status status)
|
||||
{
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "NVPW call returned error: {}", static_cast<std::underlying_type_t<NVPA_Status>>(status));
|
||||
NVBENCH_THROW(std::runtime_error,
|
||||
"NVPW call returned error: {}",
|
||||
static_cast<std::underlying_type_t<NVPA_Status>>(status));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,9 +18,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -38,7 +38,7 @@ class entropy_criterion final : public stopping_criterion_base
|
||||
nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299};
|
||||
|
||||
// Used to avoid re-allocating temporary memory
|
||||
std::vector<nvbench::float64_t> m_probabilities;
|
||||
std::vector<nvbench::float64_t> m_probabilities;
|
||||
|
||||
nvbench::float64_t compute_entropy();
|
||||
|
||||
@@ -49,7 +49,6 @@ protected:
|
||||
virtual void do_initialize() override;
|
||||
virtual void do_add_measurement(nvbench::float64_t measurement) override;
|
||||
virtual bool do_is_finished() override;
|
||||
|
||||
};
|
||||
|
||||
} // namespace nvbench::detail
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
@@ -40,7 +39,7 @@ void entropy_criterion::do_initialize()
|
||||
m_freq_tracker.clear();
|
||||
}
|
||||
|
||||
nvbench::float64_t entropy_criterion::compute_entropy()
|
||||
nvbench::float64_t entropy_criterion::compute_entropy()
|
||||
{
|
||||
const std::size_t n = m_freq_tracker.size();
|
||||
if (n == 0)
|
||||
@@ -70,15 +69,15 @@ void entropy_criterion::do_add_measurement(nvbench::float64_t measurement)
|
||||
m_total_cuda_time += measurement;
|
||||
|
||||
{
|
||||
auto key = measurement;
|
||||
auto key = measurement;
|
||||
constexpr bool bin_keys = false;
|
||||
|
||||
if (bin_keys)
|
||||
if (bin_keys)
|
||||
{
|
||||
const auto resolution_us = 0.5;
|
||||
const auto resulution_s = resolution_us / 1'000'000;
|
||||
const auto epsilon = resulution_s * 2;
|
||||
key = std::round(key / epsilon) * epsilon;
|
||||
const auto resulution_s = resolution_us / 1 '000' 000;
|
||||
const auto epsilon = resulution_s * 2;
|
||||
key = std::round(key / epsilon) * epsilon;
|
||||
}
|
||||
|
||||
// This approach is about 3x faster than `std::{unordered_,}map`
|
||||
@@ -120,7 +119,7 @@ bool entropy_criterion::do_is_finished()
|
||||
|
||||
const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
|
||||
|
||||
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
|
||||
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -25,13 +25,13 @@
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
#include <thread>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <nvbench/blocking_kernel.cuh>
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
@@ -32,12 +30,13 @@
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "nvbench/types.cuh"
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
|
||||
@@ -19,13 +19,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -66,7 +65,7 @@ protected:
|
||||
nvbench::cpu_timer m_walltime_timer;
|
||||
|
||||
nvbench::criterion_params m_criterion_params;
|
||||
nvbench::stopping_criterion_base& m_stopping_criterion;
|
||||
nvbench::stopping_criterion_base &m_stopping_criterion;
|
||||
|
||||
bool m_run_once{false};
|
||||
|
||||
|
||||
@@ -24,11 +24,11 @@
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace nvbench::detail
|
||||
{
|
||||
|
||||
@@ -36,7 +36,8 @@ measure_cpu_only_base::measure_cpu_only_base(state &exec_state)
|
||||
: m_state{exec_state}
|
||||
, m_launch(m_state.get_cuda_stream())
|
||||
, m_criterion_params{exec_state.get_criterion_params()}
|
||||
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())}
|
||||
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(
|
||||
exec_state.get_stopping_criterion())}
|
||||
, m_run_once{exec_state.get_run_once()}
|
||||
, m_min_samples{exec_state.get_min_samples()}
|
||||
, m_skip_time{exec_state.get_skip_time()}
|
||||
@@ -72,7 +73,7 @@ void measure_cpu_only_base::run_trials_prologue() { m_walltime_timer.start(); }
|
||||
void measure_cpu_only_base::record_measurements()
|
||||
{
|
||||
// Update and record timers and counters:
|
||||
const auto cur_cpu_time = m_cpu_timer.get_duration();
|
||||
const auto cur_cpu_time = m_cpu_timer.get_duration();
|
||||
|
||||
m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time);
|
||||
m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time);
|
||||
@@ -188,8 +189,7 @@ void measure_cpu_only_base::generate_summaries()
|
||||
auto &summ = m_state.add_summary("nv/cpu_only/bw/global/bytes_per_second");
|
||||
summ.set_string("name", "GlobalMem BW");
|
||||
summ.set_string("hint", "byte_rate");
|
||||
summ.set_string("description",
|
||||
"Number of bytes read/written per second.");
|
||||
summ.set_string("description", "Number of bytes read/written per second.");
|
||||
summ.set_float64("value", avg_used_gmem_bw);
|
||||
}
|
||||
} // bandwidth
|
||||
@@ -210,9 +210,9 @@ void measure_cpu_only_base::generate_summaries()
|
||||
|
||||
if (m_max_time_exceeded)
|
||||
{
|
||||
const auto timeout = m_walltime_timer.get_duration();
|
||||
const auto timeout = m_walltime_timer.get_duration();
|
||||
const auto max_noise = m_criterion_params.get_float64("max-noise");
|
||||
const auto min_time = m_criterion_params.get_float64("min-time");
|
||||
const auto min_time = m_criterion_params.get_float64("min-time");
|
||||
|
||||
if (cpu_noise > max_noise)
|
||||
{
|
||||
|
||||
@@ -24,13 +24,12 @@
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
#include <nvbench/cupti_profiler.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
|
||||
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
|
||||
#include <nvbench/detail/l2flush.cuh>
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/exec_tag.cuh>
|
||||
#include <nvbench/launch.cuh>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
|
||||
@@ -16,9 +16,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/measure_hot.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/detail/measure_hot.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/printer_base.cuh>
|
||||
|
||||
@@ -19,12 +19,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
|
||||
#include <nvbench/detail/statistics.cuh>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
namespace nvbench::detail
|
||||
@@ -76,14 +75,14 @@ public:
|
||||
return temp;
|
||||
}
|
||||
|
||||
ring_buffer_iterator operator+(difference_type n) const
|
||||
{
|
||||
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
|
||||
ring_buffer_iterator operator+(difference_type n) const
|
||||
{
|
||||
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
|
||||
}
|
||||
|
||||
ring_buffer_iterator operator-(difference_type n) const
|
||||
{
|
||||
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
|
||||
ring_buffer_iterator operator-(difference_type n) const
|
||||
{
|
||||
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
|
||||
}
|
||||
|
||||
difference_type operator-(const ring_buffer_iterator &other) const
|
||||
@@ -121,13 +120,9 @@ private:
|
||||
std::size_t m_index{0};
|
||||
bool m_full{false};
|
||||
|
||||
std::size_t get_front_index() const
|
||||
{
|
||||
return m_full ? m_index : 0;
|
||||
}
|
||||
std::size_t get_front_index() const { return m_full ? m_index : 0; }
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Create a new ring buffer with the requested capacity.
|
||||
*/
|
||||
|
||||
@@ -16,15 +16,13 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/detail/state_generator.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/detail/state_generator.cuh>
|
||||
#include <nvbench/detail/transform_reduce.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/type_axis.cuh>
|
||||
|
||||
#include <nvbench/detail/transform_reduce.cuh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
@@ -165,7 +163,7 @@ void state_generator::build_axis_configs()
|
||||
config.set_string(axis_info.axis, axis.get_input_string(axis_info.index));
|
||||
}
|
||||
} // type_si
|
||||
} // type_axis_config generation
|
||||
} // type_axis_config generation
|
||||
|
||||
// non_type_axis_config generation
|
||||
{
|
||||
@@ -201,9 +199,9 @@ void state_generator::build_axis_configs()
|
||||
axes.get_string_axis(axis_info.axis).get_value(axis_info.index));
|
||||
break;
|
||||
} // switch (type)
|
||||
} // for (axis_info : current_indices)
|
||||
} // for non_type_sg configs
|
||||
} // non_type_axis_config generation
|
||||
} // for (axis_info : current_indices)
|
||||
} // for non_type_sg configs
|
||||
} // non_type_axis_config generation
|
||||
}
|
||||
|
||||
void state_generator::build_states()
|
||||
|
||||
@@ -26,12 +26,10 @@
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
namespace nvbench::detail::statistics
|
||||
@@ -154,7 +152,7 @@ nvbench::float64_t compute_r2(It first,
|
||||
|
||||
for (std::size_t i = 0; i < n; ++i, ++first)
|
||||
{
|
||||
const nvbench::float64_t y = *first;
|
||||
const nvbench::float64_t y = *first;
|
||||
const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept;
|
||||
|
||||
ss_tot += (y - mean_y) * (y - mean_y);
|
||||
@@ -179,19 +177,10 @@ compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t inter
|
||||
return compute_r2(first, last, compute_mean(first, last), slope, intercept);
|
||||
}
|
||||
|
||||
inline nvbench::float64_t rad2deg(nvbench::float64_t rad)
|
||||
{
|
||||
return rad * 180.0 / M_PI;
|
||||
}
|
||||
inline nvbench::float64_t rad2deg(nvbench::float64_t rad) { return rad * 180.0 / M_PI; }
|
||||
|
||||
inline nvbench::float64_t slope2rad(nvbench::float64_t slope)
|
||||
{
|
||||
return std::atan2(slope, 1.0);
|
||||
}
|
||||
inline nvbench::float64_t slope2rad(nvbench::float64_t slope) { return std::atan2(slope, 1.0); }
|
||||
|
||||
inline nvbench::float64_t slope2deg(nvbench::float64_t slope)
|
||||
{
|
||||
return rad2deg(slope2rad(slope));
|
||||
}
|
||||
inline nvbench::float64_t slope2deg(nvbench::float64_t slope) { return rad2deg(slope2rad(slope)); }
|
||||
|
||||
} // namespace nvbench::detail::statistics
|
||||
|
||||
@@ -18,9 +18,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ stdrel_criterion::stdrel_criterion()
|
||||
|
||||
void stdrel_criterion::do_initialize()
|
||||
{
|
||||
m_total_samples = 0;
|
||||
m_total_samples = 0;
|
||||
m_total_cuda_time = 0.0;
|
||||
m_cuda_times.clear();
|
||||
m_noise_tracker.clear();
|
||||
@@ -46,7 +46,7 @@ void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
|
||||
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
|
||||
m_cuda_times.cend(),
|
||||
mean_cuda_time);
|
||||
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
|
||||
if (std::isfinite(cuda_rel_stdev))
|
||||
{
|
||||
m_noise_tracker.push_back(cuda_rel_stdev);
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#define NVBENCH_THROW(exception_type, format_str, ...) \
|
||||
|
||||
@@ -16,13 +16,13 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/detail/timestamps_kernel.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -71,12 +71,11 @@ void timestamps_kernel::record(const nvbench::cuda_stream &stream)
|
||||
int num_sms = 0;
|
||||
|
||||
NVBENCH_CUDA_CALL(cudaGetDevice(&device_id));
|
||||
NVBENCH_CUDA_CALL(
|
||||
cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id));
|
||||
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id));
|
||||
|
||||
get_timestamps_kernel<<<static_cast<unsigned int>(num_sms), 1, 0, stream.get_stream()>>>(
|
||||
m_device_timestamps,
|
||||
m_device_timestamps + 1);
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
} // namespace nvbench::detail
|
||||
|
||||
@@ -82,7 +82,7 @@ struct cartesian_product<nvbench::type_list<nvbench::type_list<T, Tail...>, TL,
|
||||
using tail_prod = typename detail::cartesian_product<nvbench::type_list<TL, TLTail...>>::type;
|
||||
using cur = typename detail::prepend_each<T, tail_prod>::type;
|
||||
using next = typename detail::cartesian_product<
|
||||
nvbench::type_list<nvbench::type_list<Tail...>, TL, TLTail...>>::type;
|
||||
nvbench::type_list<nvbench::type_list<Tail...>, TL, TLTail...>>::type;
|
||||
using type = decltype(detail::concat(cur{}, next{}));
|
||||
};
|
||||
|
||||
|
||||
@@ -16,11 +16,10 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/device_info.cuh>
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/detail/device_scope.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/internal/nvml.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
@@ -18,17 +18,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/detail/device_scope.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <cstdint> // CHAR_BIT
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
// forward declare this for internal storage
|
||||
struct nvmlDevice_st;
|
||||
|
||||
@@ -16,13 +16,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/device_manager.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/detail/device_scope.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
@@ -45,13 +44,13 @@ device_manager::device_manager()
|
||||
}
|
||||
}
|
||||
|
||||
const nvbench::device_info &device_manager::get_device(int id)
|
||||
{
|
||||
if (id < 0)
|
||||
const nvbench::device_info &device_manager::get_device(int id)
|
||||
{
|
||||
if (id < 0)
|
||||
{
|
||||
NVBENCH_THROW(std::runtime_error, "Negative index: {}.", id);
|
||||
}
|
||||
return m_devices.at(static_cast<std::size_t>(id));
|
||||
return m_devices.at(static_cast<std::size_t>(id));
|
||||
}
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -101,10 +101,10 @@ using no_gpu_t = tag<nvbench::detail::exec_flag::no_gpu>;
|
||||
using no_batch_t = tag<nvbench::detail::exec_flag::no_batch>;
|
||||
using modifier_mask_t = tag<nvbench::detail::exec_flag::modifier_mask>;
|
||||
|
||||
using hot_t = tag<nvbench::detail::exec_flag::hot>;
|
||||
using cold_t = tag<nvbench::detail::exec_flag::cold>;
|
||||
using cpu_only_t = tag<nvbench::detail::exec_flag::cpu_only>;
|
||||
using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
|
||||
using hot_t = tag<nvbench::detail::exec_flag::hot>;
|
||||
using cold_t = tag<nvbench::detail::exec_flag::cold>;
|
||||
using cpu_only_t = tag<nvbench::detail::exec_flag::cpu_only>;
|
||||
using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
|
||||
|
||||
constexpr inline none_t none;
|
||||
constexpr inline timer_t timer;
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/axis_base.cuh>
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <vector>
|
||||
@@ -40,7 +39,10 @@ struct float64_axis final : public axis_base
|
||||
[[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<axis_base> do_clone() const final { return std::make_unique<float64_axis>(*this); }
|
||||
std::unique_ptr<axis_base> do_clone() const final
|
||||
{
|
||||
return std::make_unique<float64_axis>(*this);
|
||||
}
|
||||
std::size_t do_get_size() const final { return m_values.size(); }
|
||||
std::string do_get_input_string(std::size_t i) const final;
|
||||
std::string do_get_description(std::size_t i) const final;
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/axis_base.cuh>
|
||||
|
||||
#include <nvbench/flags.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
|
||||
@@ -16,9 +16,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/int64_axis.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/int64_axis.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/detail/transform_reduce.cuh>
|
||||
|
||||
#include <nvbench/internal/table_builder.cuh>
|
||||
|
||||
#include <fmt/color.h>
|
||||
|
||||
@@ -21,12 +21,12 @@
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#ifdef NVBENCH_HAS_NVML
|
||||
#include <nvml.h>
|
||||
#endif // NVBENCH_HAS_NVML
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
namespace nvbench::nvml
|
||||
@@ -38,6 +38,7 @@ struct NVMLLifetimeManager
|
||||
{
|
||||
NVMLLifetimeManager();
|
||||
~NVMLLifetimeManager();
|
||||
|
||||
private:
|
||||
bool m_inited{false};
|
||||
};
|
||||
|
||||
@@ -16,24 +16,22 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/json_printer.cuh>
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/device_info.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/git_revision.cuh>
|
||||
#include <nvbench/json_printer.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
#include <nvbench/version.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
@@ -105,7 +103,7 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
|
||||
default:
|
||||
NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type.");
|
||||
} // end switch (value type)
|
||||
} // end foreach value name
|
||||
} // end foreach value name
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
@@ -225,27 +223,26 @@ static void add_devices_section(nlohmann::ordered_json &root)
|
||||
auto &devices = root["devices"];
|
||||
for (const auto &dev_info : nvbench::device_manager::get().get_devices())
|
||||
{
|
||||
auto &device = devices.emplace_back();
|
||||
device["id"] = dev_info.get_id();
|
||||
device["name"] = dev_info.get_name();
|
||||
device["sm_version"] = dev_info.get_sm_version();
|
||||
device["ptx_version"] = dev_info.get_ptx_version();
|
||||
device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate();
|
||||
device["number_of_sms"] = dev_info.get_number_of_sms();
|
||||
device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm();
|
||||
device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm();
|
||||
device["max_threads_per_block"] = dev_info.get_max_threads_per_block();
|
||||
device["registers_per_sm"] = dev_info.get_registers_per_sm();
|
||||
device["registers_per_block"] = dev_info.get_registers_per_block();
|
||||
device["global_memory_size"] = dev_info.get_global_memory_size();
|
||||
device["global_memory_bus_peak_clock_rate"] =
|
||||
dev_info.get_global_memory_bus_peak_clock_rate();
|
||||
device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width();
|
||||
device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth();
|
||||
device["l2_cache_size"] = dev_info.get_l2_cache_size();
|
||||
device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm();
|
||||
device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block();
|
||||
device["ecc_state"] = dev_info.get_ecc_state();
|
||||
auto &device = devices.emplace_back();
|
||||
device["id"] = dev_info.get_id();
|
||||
device["name"] = dev_info.get_name();
|
||||
device["sm_version"] = dev_info.get_sm_version();
|
||||
device["ptx_version"] = dev_info.get_ptx_version();
|
||||
device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate();
|
||||
device["number_of_sms"] = dev_info.get_number_of_sms();
|
||||
device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm();
|
||||
device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm();
|
||||
device["max_threads_per_block"] = dev_info.get_max_threads_per_block();
|
||||
device["registers_per_sm"] = dev_info.get_registers_per_sm();
|
||||
device["registers_per_block"] = dev_info.get_registers_per_block();
|
||||
device["global_memory_size"] = dev_info.get_global_memory_size();
|
||||
device["global_memory_bus_peak_clock_rate"] = dev_info.get_global_memory_bus_peak_clock_rate();
|
||||
device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width();
|
||||
device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth();
|
||||
device["l2_cache_size"] = dev_info.get_l2_cache_size();
|
||||
device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm();
|
||||
device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block();
|
||||
device["ecc_state"] = dev_info.get_ecc_state();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -298,8 +295,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
false;
|
||||
#endif
|
||||
} // "nvbench"
|
||||
} // "version"
|
||||
} // "meta"
|
||||
} // "version"
|
||||
} // "meta"
|
||||
|
||||
add_devices_section(root);
|
||||
|
||||
@@ -362,8 +359,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
default:
|
||||
break;
|
||||
} // end switch (axis type)
|
||||
} // end foreach axis value
|
||||
} // end foreach axis
|
||||
} // end foreach axis value
|
||||
} // end foreach axis
|
||||
|
||||
auto &states = bench["states"];
|
||||
for (const auto &exec_state : bench_ptr->get_states())
|
||||
@@ -431,8 +428,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
|
||||
continue;
|
||||
}
|
||||
} // end foreach exec_state
|
||||
} // end foreach benchmark
|
||||
} // "benchmarks"
|
||||
} // end foreach benchmark
|
||||
} // "benchmarks"
|
||||
|
||||
m_ostream << root.dump(2) << "\n";
|
||||
}
|
||||
@@ -492,7 +489,7 @@ void json_printer::do_print_benchmark_list(const benchmark_vector &benches)
|
||||
default:
|
||||
break;
|
||||
} // end switch (axis type)
|
||||
} // end foreach axis value
|
||||
} // end foreach axis value
|
||||
}
|
||||
} // end foreach bench
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/printer_base.cuh>
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <string>
|
||||
|
||||
@@ -16,15 +16,13 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/markdown_printer.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/device_manager.cuh>
|
||||
#include <nvbench/internal/markdown_table.cuh>
|
||||
#include <nvbench/markdown_printer.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
|
||||
#include <nvbench/internal/markdown_table.cuh>
|
||||
|
||||
#include <fmt/color.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -72,8 +70,12 @@ void markdown_printer::do_print_device_info()
|
||||
"* Max Shared Memory: {} KiB/SM, {} KiB/Block\n",
|
||||
device.get_shared_memory_per_sm() / 1024,
|
||||
device.get_shared_memory_per_block() / 1024);
|
||||
fmt::format_to(std::back_inserter(buffer), "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024);
|
||||
fmt::format_to(std::back_inserter(buffer), "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm());
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
"* L2 Cache Size: {} KiB\n",
|
||||
device.get_l2_cache_size() / 1024);
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
"* Maximum Active Blocks: {}/SM\n",
|
||||
device.get_max_blocks_per_sm());
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
"* Maximum Active Threads: {}/SM, {}/Block\n",
|
||||
device.get_max_threads_per_sm(),
|
||||
@@ -82,7 +84,9 @@ void markdown_printer::do_print_device_info()
|
||||
"* Available Registers: {}/SM, {}/Block\n",
|
||||
device.get_registers_per_sm(),
|
||||
device.get_registers_per_block());
|
||||
fmt::format_to(std::back_inserter(buffer), "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No");
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
"* ECC Enabled: {}\n",
|
||||
device.get_ecc_state() ? "Yes" : "No");
|
||||
fmt::format_to(std::back_inserter(buffer), "\n");
|
||||
}
|
||||
m_ostream << fmt::to_string(buffer);
|
||||
@@ -191,9 +195,12 @@ void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vec
|
||||
{
|
||||
desc = fmt::format(" ({})", desc);
|
||||
}
|
||||
fmt::format_to(std::back_inserter(buffer), " * `{}`{}\n", axis_ptr->get_input_string(i), desc);
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
" * `{}`{}\n",
|
||||
axis_ptr->get_input_string(i),
|
||||
desc);
|
||||
} // end foreach value
|
||||
} // end foreach axis
|
||||
} // end foreach axis
|
||||
fmt::format_to(std::back_inserter(buffer), "\n");
|
||||
} // end foreach bench
|
||||
|
||||
|
||||
@@ -16,10 +16,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/named_values.cuh>
|
||||
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
|
||||
@@ -24,8 +24,8 @@
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/config.cuh>
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/create.cuh>
|
||||
#include <nvbench/criterion_manager.cuh>
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
#include <nvbench/internal/cli_help.cuh>
|
||||
#include <nvbench/internal/cli_help_axis.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
@@ -44,12 +46,10 @@
|
||||
#include <regex>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <string_view>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
||||
@@ -191,9 +191,9 @@ protected:
|
||||
virtual void do_process_bulk_data_float64(nvbench::state &,
|
||||
const std::string &,
|
||||
const std::string &,
|
||||
const std::vector<nvbench::float64_t> &){};
|
||||
const std::vector<nvbench::float64_t> &) {};
|
||||
|
||||
virtual void do_print_benchmark_list(const benchmark_vector &)
|
||||
virtual void do_print_benchmark_list(const benchmark_vector &)
|
||||
{
|
||||
throw std::runtime_error{"nvbench::do_print_benchmark_list is not supported by this printer."};
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
|
||||
#include <nvbench/detail/state_generator.cuh>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
@@ -16,10 +16,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/runner.cuh>
|
||||
|
||||
#include <nvbench/benchmark_base.cuh>
|
||||
#include <nvbench/printer_base.cuh>
|
||||
#include <nvbench/runner.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -20,13 +20,13 @@
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <fmt/color.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include <fmt/color.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
|
||||
@@ -21,19 +21,21 @@
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
|
||||
namespace detail
|
||||
namespace detail
|
||||
{
|
||||
|
||||
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
|
||||
constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation
|
||||
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
|
||||
constexpr nvbench::float64_t compat_max_noise()
|
||||
{
|
||||
return 0.005;
|
||||
} // 0.5% relative standard deviation
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -43,6 +45,7 @@ constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relati
|
||||
class criterion_params
|
||||
{
|
||||
nvbench::named_values m_named_values;
|
||||
|
||||
public:
|
||||
criterion_params();
|
||||
criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>);
|
||||
@@ -96,7 +99,7 @@ public:
|
||||
*
|
||||
* This method is called once per benchmark run, before any measurements are provided.
|
||||
*/
|
||||
void initialize(const criterion_params ¶ms)
|
||||
void initialize(const criterion_params ¶ms)
|
||||
{
|
||||
m_params.set_from(params);
|
||||
this->do_initialize();
|
||||
@@ -105,18 +108,12 @@ public:
|
||||
/**
|
||||
* Add the latest measurement to the criterion
|
||||
*/
|
||||
void add_measurement(nvbench::float64_t measurement)
|
||||
{
|
||||
this->do_add_measurement(measurement);
|
||||
}
|
||||
void add_measurement(nvbench::float64_t measurement) { this->do_add_measurement(measurement); }
|
||||
|
||||
/**
|
||||
* Check if the criterion has been met for all measurements processed by `add_measurement`
|
||||
*/
|
||||
bool is_finished()
|
||||
{
|
||||
return this->do_is_finished();
|
||||
}
|
||||
bool is_finished() { return this->do_is_finished(); }
|
||||
|
||||
protected:
|
||||
/**
|
||||
|
||||
@@ -16,10 +16,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
|
||||
namespace nvbench
|
||||
{
|
||||
@@ -62,7 +60,7 @@ void criterion_params::set_from(const criterion_params &other)
|
||||
|
||||
void criterion_params::set_int64(std::string name, nvbench::int64_t value)
|
||||
{
|
||||
if (m_named_values.has_value(name))
|
||||
if (m_named_values.has_value(name))
|
||||
{
|
||||
m_named_values.remove_value(name);
|
||||
}
|
||||
@@ -72,7 +70,7 @@ void criterion_params::set_int64(std::string name, nvbench::int64_t value)
|
||||
|
||||
void criterion_params::set_float64(std::string name, nvbench::float64_t value)
|
||||
{
|
||||
if (m_named_values.has_value(name))
|
||||
if (m_named_values.has_value(name))
|
||||
{
|
||||
m_named_values.remove_value(name);
|
||||
}
|
||||
@@ -82,7 +80,7 @@ void criterion_params::set_float64(std::string name, nvbench::float64_t value)
|
||||
|
||||
void criterion_params::set_string(std::string name, std::string value)
|
||||
{
|
||||
if (m_named_values.has_value(name))
|
||||
if (m_named_values.has_value(name))
|
||||
{
|
||||
m_named_values.remove_value(name);
|
||||
}
|
||||
@@ -110,15 +108,11 @@ std::string criterion_params::get_string(const std::string &name) const
|
||||
return m_named_values.get_string(name);
|
||||
}
|
||||
|
||||
std::vector<std::string> criterion_params::get_names() const
|
||||
{
|
||||
return m_named_values.get_names();
|
||||
}
|
||||
std::vector<std::string> criterion_params::get_names() const { return m_named_values.get_names(); }
|
||||
|
||||
nvbench::named_values::type criterion_params::get_type(const std::string &name) const
|
||||
{
|
||||
return m_named_values.get_type(name);
|
||||
}
|
||||
|
||||
|
||||
} // namespace nvbench::detail
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/axis_base.cuh>
|
||||
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvbench/axis_base.cuh>
|
||||
|
||||
#include <nvbench/type_list.cuh>
|
||||
#include <nvbench/type_strings.cuh>
|
||||
|
||||
|
||||
@@ -16,9 +16,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/type_axis.cuh>
|
||||
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
#include <nvbench/type_axis.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "detail/type_list_impl.cuh"
|
||||
#include <nvbench/detail/type_list_impl.cuh>
|
||||
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
@@ -27,11 +27,11 @@
|
||||
#endif
|
||||
|
||||
#ifdef NVBENCH_CXXABI_DEMANGLE
|
||||
#include <cxxabi.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
|
||||
#include <cxxabi.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
struct free_wrapper
|
||||
|
||||
@@ -5,12 +5,11 @@ import math
|
||||
import os
|
||||
import sys
|
||||
|
||||
from colorama import Fore
|
||||
|
||||
import tabulate
|
||||
|
||||
from colorama import Fore
|
||||
from nvbench_json import reader
|
||||
|
||||
|
||||
# Parse version string into tuple, "x.y.z" -> (x, y, z)
|
||||
def version_tuple(v):
|
||||
return tuple(map(int, (v.split("."))))
|
||||
@@ -139,15 +138,14 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
colalign.append("center")
|
||||
|
||||
for device_id in device_ids:
|
||||
|
||||
rows = []
|
||||
plot_data = {'cmp': {}, 'ref': {}, 'cmp_noise': {}, 'ref_noise': {}}
|
||||
plot_data = {"cmp": {}, "ref": {}, "cmp_noise": {}, "ref_noise": {}}
|
||||
|
||||
for cmp_state in cmp_states:
|
||||
cmp_state_name = cmp_state["name"]
|
||||
ref_state = next(filter(lambda st: st["name"] == cmp_state_name,
|
||||
ref_states),
|
||||
None)
|
||||
ref_state = next(
|
||||
filter(lambda st: st["name"] == cmp_state_name, ref_states), None
|
||||
)
|
||||
if not ref_state:
|
||||
continue
|
||||
|
||||
@@ -158,9 +156,7 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
row = []
|
||||
for axis_value in axis_values:
|
||||
axis_value_name = axis_value["name"]
|
||||
row.append(format_axis_value(axis_value_name,
|
||||
axis_value,
|
||||
axes))
|
||||
row.append(format_axis_value(axis_value_name, axis_value, axes))
|
||||
|
||||
cmp_summaries = cmp_state["summaries"]
|
||||
ref_summaries = ref_state["summaries"]
|
||||
@@ -171,23 +167,37 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
def lookup_summary(summaries, tag):
|
||||
return next(filter(lambda s: s["tag"] == tag, summaries), None)
|
||||
|
||||
cmp_time_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/mean")
|
||||
ref_time_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/mean")
|
||||
cmp_noise_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/stdev/relative")
|
||||
ref_noise_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/stdev/relative")
|
||||
cmp_time_summary = lookup_summary(
|
||||
cmp_summaries, "nv/cold/time/gpu/mean"
|
||||
)
|
||||
ref_time_summary = lookup_summary(
|
||||
ref_summaries, "nv/cold/time/gpu/mean"
|
||||
)
|
||||
cmp_noise_summary = lookup_summary(
|
||||
cmp_summaries, "nv/cold/time/gpu/stdev/relative"
|
||||
)
|
||||
ref_noise_summary = lookup_summary(
|
||||
ref_summaries, "nv/cold/time/gpu/stdev/relative"
|
||||
)
|
||||
|
||||
# TODO: Use other timings, too. Maybe multiple rows, with a
|
||||
# "Timing" column + values "CPU/GPU/Batch"?
|
||||
if not all([cmp_time_summary,
|
||||
ref_time_summary,
|
||||
cmp_noise_summary,
|
||||
ref_noise_summary]):
|
||||
if not all(
|
||||
[
|
||||
cmp_time_summary,
|
||||
ref_time_summary,
|
||||
cmp_noise_summary,
|
||||
ref_noise_summary,
|
||||
]
|
||||
):
|
||||
continue
|
||||
|
||||
def extract_value(summary):
|
||||
summary_data = summary["data"]
|
||||
value_data = next(filter(lambda v: v["name"] == "value", summary_data))
|
||||
assert(value_data["type"] == "float64")
|
||||
value_data = next(
|
||||
filter(lambda v: v["name"] == "value", summary_data)
|
||||
)
|
||||
assert value_data["type"] == "float64"
|
||||
return value_data["value"]
|
||||
|
||||
cmp_time = extract_value(cmp_time_summary)
|
||||
@@ -218,23 +228,27 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
if plot:
|
||||
axis_name = []
|
||||
axis_value = "--"
|
||||
for aid in range(len(axis_values)):
|
||||
for aid in range(len(axis_values)):
|
||||
if axis_values[aid]["name"] != plot:
|
||||
axis_name.append("{} = {}".format(axis_values[aid]["name"], axis_values[aid]["value"]))
|
||||
axis_name.append(
|
||||
"{} = {}".format(
|
||||
axis_values[aid]["name"], axis_values[aid]["value"]
|
||||
)
|
||||
)
|
||||
else:
|
||||
axis_value = float(axis_values[aid]["value"])
|
||||
axis_name = ', '.join(axis_name)
|
||||
axis_value = float(axis_values[aid]["value"])
|
||||
axis_name = ", ".join(axis_name)
|
||||
|
||||
if axis_name not in plot_data['cmp']:
|
||||
plot_data['cmp'][axis_name] = {}
|
||||
plot_data['ref'][axis_name] = {}
|
||||
plot_data['cmp_noise'][axis_name] = {}
|
||||
plot_data['ref_noise'][axis_name] = {}
|
||||
if axis_name not in plot_data["cmp"]:
|
||||
plot_data["cmp"][axis_name] = {}
|
||||
plot_data["ref"][axis_name] = {}
|
||||
plot_data["cmp_noise"][axis_name] = {}
|
||||
plot_data["ref_noise"][axis_name] = {}
|
||||
|
||||
plot_data['cmp'][axis_name][axis_value] = cmp_time
|
||||
plot_data['ref'][axis_name][axis_value] = ref_time
|
||||
plot_data['cmp_noise'][axis_name][axis_value] = cmp_noise
|
||||
plot_data['ref_noise'][axis_name][axis_value] = ref_noise
|
||||
plot_data["cmp"][axis_name][axis_value] = cmp_time
|
||||
plot_data["ref"][axis_name][axis_value] = ref_time
|
||||
plot_data["cmp_noise"][axis_name][axis_value] = cmp_noise
|
||||
plot_data["ref_noise"][axis_name][axis_value] = ref_noise
|
||||
|
||||
global config_count
|
||||
global unknown_count
|
||||
@@ -273,14 +287,13 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
print("## [%d] %s\n" % (device["id"], device["name"]))
|
||||
# colalign and github format require tabulate 0.8.3
|
||||
if tabulate_version >= (0, 8, 3):
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
colalign=colalign,
|
||||
tablefmt="github"))
|
||||
print(
|
||||
tabulate.tabulate(
|
||||
rows, headers=headers, colalign=colalign, tablefmt="github"
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
tablefmt="markdown"))
|
||||
print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
|
||||
|
||||
print("")
|
||||
|
||||
@@ -295,18 +308,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
x = [float(x) for x in plot_data[key][axis].keys()]
|
||||
y = list(plot_data[key][axis].values())
|
||||
|
||||
noise = list(plot_data[key + '_noise'][axis].values())
|
||||
noise = list(plot_data[key + "_noise"][axis].values())
|
||||
|
||||
top = [y[i] + y[i] * noise[i] for i in range(len(x))]
|
||||
bottom = [y[i] - y[i] * noise[i] for i in range(len(x))]
|
||||
|
||||
p = plt.plot(x, y, shape, marker='o', label=label)
|
||||
p = plt.plot(x, y, shape, marker="o", label=label)
|
||||
plt.fill_between(x, bottom, top, color=p[0].get_color(), alpha=0.1)
|
||||
|
||||
|
||||
for axis in plot_data['cmp'].keys():
|
||||
plot_line('cmp', '-', axis)
|
||||
plot_line('ref', '--', axis + ' ref')
|
||||
for axis in plot_data["cmp"].keys():
|
||||
plot_line("cmp", "-", axis)
|
||||
plot_line("ref", "--", axis + " ref")
|
||||
|
||||
plt.legend()
|
||||
plt.show()
|
||||
@@ -314,11 +326,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
|
||||
|
||||
def main():
|
||||
help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]"
|
||||
parser = argparse.ArgumentParser(prog='nvbench_compare', usage=help_text)
|
||||
parser.add_argument('--threshold-diff', type=float, dest='threshold', default=0.0,
|
||||
help='only show benchmarks where percentage diff is >= THRESHOLD')
|
||||
parser.add_argument('--plot-along', type=str, dest='plot', default=None,
|
||||
help='plot results')
|
||||
parser = argparse.ArgumentParser(prog="nvbench_compare", usage=help_text)
|
||||
parser.add_argument(
|
||||
"--threshold-diff",
|
||||
type=float,
|
||||
dest="threshold",
|
||||
default=0.0,
|
||||
help="only show benchmarks where percentage diff is >= THRESHOLD",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--plot-along", type=str, dest="plot", default=None, help="plot results"
|
||||
)
|
||||
|
||||
args, files_or_dirs = parser.parse_known_args()
|
||||
print(files_or_dirs)
|
||||
@@ -336,14 +354,17 @@ def main():
|
||||
continue
|
||||
r = os.path.join(files_or_dirs[0], f)
|
||||
c = os.path.join(files_or_dirs[1], f)
|
||||
if os.path.isfile(r) and os.path.isfile(c) and \
|
||||
os.path.getsize(r) > 0 and os.path.getsize(c) > 0:
|
||||
if (
|
||||
os.path.isfile(r)
|
||||
and os.path.isfile(c)
|
||||
and os.path.getsize(r) > 0
|
||||
and os.path.getsize(c) > 0
|
||||
):
|
||||
to_compare.append((r, c))
|
||||
else:
|
||||
to_compare = [(files_or_dirs[0], files_or_dirs[1])]
|
||||
|
||||
for ref, comp in to_compare:
|
||||
|
||||
ref_root = reader.read_file(ref)
|
||||
cmp_root = reader.read_file(comp)
|
||||
|
||||
@@ -355,7 +376,9 @@ def main():
|
||||
print("Device sections do not match.")
|
||||
sys.exit(1)
|
||||
|
||||
compare_benches(ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot)
|
||||
compare_benches(
|
||||
ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot
|
||||
)
|
||||
|
||||
print("# Summary\n")
|
||||
print("- Total Matches: %d" % config_count)
|
||||
@@ -365,5 +388,5 @@ def main():
|
||||
return failure_count
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from nvbench_json import reader
|
||||
|
||||
|
||||
def parse_files():
|
||||
help_text = "%(prog)s [nvbench.out.json | dir/] ..."
|
||||
parser = argparse.ArgumentParser(prog='nvbench_histogram', usage=help_text)
|
||||
parser = argparse.ArgumentParser(prog="nvbench_histogram", usage=help_text)
|
||||
|
||||
args, files_or_dirs = parser.parse_known_args()
|
||||
|
||||
@@ -41,14 +41,14 @@ def parse_files():
|
||||
def extract_filename(summary):
|
||||
summary_data = summary["data"]
|
||||
value_data = next(filter(lambda v: v["name"] == "filename", summary_data))
|
||||
assert(value_data["type"] == "string")
|
||||
assert value_data["type"] == "string"
|
||||
return value_data["value"]
|
||||
|
||||
|
||||
def extract_size(summary):
|
||||
summary_data = summary["data"]
|
||||
value_data = next(filter(lambda v: v["name"] == "size", summary_data))
|
||||
assert(value_data["type"] == "int64")
|
||||
assert value_data["type"] == "int64"
|
||||
return int(value_data["value"])
|
||||
|
||||
|
||||
@@ -57,9 +57,10 @@ def parse_samples_meta(filename, state):
|
||||
if not summaries:
|
||||
return None, None
|
||||
|
||||
summary = next(filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times",
|
||||
summaries),
|
||||
None)
|
||||
summary = next(
|
||||
filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", summaries),
|
||||
None,
|
||||
)
|
||||
if not summary:
|
||||
return None, None
|
||||
|
||||
@@ -81,7 +82,7 @@ def parse_samples(filename, state):
|
||||
with open(samples_filename, "rb") as f:
|
||||
samples = np.fromfile(f, "<f4")
|
||||
|
||||
assert (sample_count == len(samples))
|
||||
assert sample_count == len(samples)
|
||||
return samples
|
||||
|
||||
|
||||
@@ -118,5 +119,5 @@ def main():
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
from . import reader
|
||||
from . import version
|
||||
from . import reader, version
|
||||
|
||||
__all__ = ["reader", "version"]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
file_version = (1, 0, 0)
|
||||
|
||||
file_version_string = "{}.{}.{}".format(file_version[0],
|
||||
file_version[1],
|
||||
file_version[2])
|
||||
file_version_string = "{}.{}.{}".format(
|
||||
file_version[0], file_version[1], file_version[2]
|
||||
)
|
||||
|
||||
|
||||
def check_file_version(filename, root_node):
|
||||
@@ -19,8 +19,14 @@ def check_file_version(filename, root_node):
|
||||
# for now just warn on mismatch.
|
||||
if version_node["string"] != file_version_string:
|
||||
print("WARNING:")
|
||||
print(" {} was written using a different NVBench JSON file version."
|
||||
.format(filename))
|
||||
print(
|
||||
" {} was written using a different NVBench JSON file version.".format(
|
||||
filename
|
||||
)
|
||||
)
|
||||
print(" It may not read correctly.")
|
||||
print(" (file version: {} reader version: {})"
|
||||
.format(version_node["string"], file_version_string))
|
||||
print(
|
||||
" (file version: {} reader version: {})".format(
|
||||
version_node["string"], file_version_string
|
||||
)
|
||||
)
|
||||
|
||||
@@ -5,9 +5,8 @@ import math
|
||||
import os
|
||||
import sys
|
||||
|
||||
from nvbench_json import reader
|
||||
|
||||
import tabulate
|
||||
from nvbench_json import reader
|
||||
|
||||
|
||||
# Parse version string into tuple, "x.y.z" -> (x, y, z)
|
||||
@@ -39,7 +38,8 @@ def format_walltime(seconds_in):
|
||||
"{:0>2d}:".format(h) if h > 1e-9 else "",
|
||||
"{:0>2d}:".format(m) if (h > 1e-9 or m > 1e-9) else "",
|
||||
"{:0>2d}.".format(s) if (h > 1e-9 or m > 1e-9) else "{:d}.".format(s),
|
||||
"{:0>3d}".format(ms))
|
||||
"{:0>3d}".format(ms),
|
||||
)
|
||||
|
||||
|
||||
def format_percentage(percentage):
|
||||
@@ -58,7 +58,7 @@ measure_column_names = {"cold": "Isolated", "batch": "Batch", "cupti": "CUPTI"}
|
||||
def init_measures():
|
||||
out = {}
|
||||
for name in measure_names:
|
||||
out[name] = 0.
|
||||
out[name] = 0.0
|
||||
return out
|
||||
|
||||
|
||||
@@ -67,17 +67,17 @@ def get_measures(state):
|
||||
times = {}
|
||||
for name in measure_names:
|
||||
measure_walltime_tag = "nv/{}/walltime".format(name)
|
||||
summary = next(filter(lambda s: s["tag"] == measure_walltime_tag,
|
||||
summaries),
|
||||
None)
|
||||
summary = next(
|
||||
filter(lambda s: s["tag"] == measure_walltime_tag, summaries), None
|
||||
)
|
||||
if not summary:
|
||||
continue
|
||||
|
||||
walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"]))
|
||||
assert(walltime_data["type"] == "float64")
|
||||
assert walltime_data["type"] == "float64"
|
||||
walltime = walltime_data["value"]
|
||||
walltime = float(walltime)
|
||||
times[name] = walltime if walltime else 0.
|
||||
times[name] = walltime if walltime else 0.0
|
||||
return times
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ def merge_measures(target, src):
|
||||
|
||||
|
||||
def sum_measures(measures):
|
||||
total_time = 0.
|
||||
total_time = 0.0
|
||||
for time in measures.values():
|
||||
total_time += time
|
||||
return total_time
|
||||
@@ -194,20 +194,21 @@ def print_overview_section(data):
|
||||
|
||||
# colalign and github format require tabulate 0.8.3
|
||||
if tabulate_version >= (0, 8, 3):
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
colalign=colalign,
|
||||
tablefmt="github"))
|
||||
print(
|
||||
tabulate.tabulate(
|
||||
rows, headers=headers, colalign=colalign, tablefmt="github"
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
tablefmt="markdown"))
|
||||
print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
# append_data_row_lambda args: (row_list, name, items[name])
|
||||
def print_measures_table(headers, colalign, items, total_measures, append_item_row_lambda):
|
||||
def print_measures_table(
|
||||
headers, colalign, items, total_measures, append_item_row_lambda
|
||||
):
|
||||
total_time = sum_measures(total_measures)
|
||||
active_measures = get_active_measure_names(total_measures)
|
||||
num_user_columns = len(headers)
|
||||
@@ -248,14 +249,13 @@ def print_measures_table(headers, colalign, items, total_measures, append_item_r
|
||||
|
||||
# colalign and github format require tabulate 0.8.3
|
||||
if tabulate_version >= (0, 8, 3):
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
colalign=colalign,
|
||||
tablefmt="github"))
|
||||
print(
|
||||
tabulate.tabulate(
|
||||
rows, headers=headers, colalign=colalign, tablefmt="github"
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(tabulate.tabulate(rows,
|
||||
headers=headers,
|
||||
tablefmt="markdown"))
|
||||
print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
|
||||
|
||||
|
||||
def print_files_section(data):
|
||||
@@ -319,7 +319,7 @@ def print_bench_section(bench_name, bench):
|
||||
|
||||
def main():
|
||||
help_text = "%(prog)s [nvbench.out.json | dir/]..."
|
||||
parser = argparse.ArgumentParser(prog='nvbench_walltime', usage=help_text)
|
||||
parser = argparse.ArgumentParser(prog="nvbench_walltime", usage=help_text)
|
||||
|
||||
args, files_or_dirs = parser.parse_known_args()
|
||||
|
||||
@@ -353,5 +353,5 @@ def main():
|
||||
print_files_section(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -17,22 +17,19 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/axes_metadata.cuh>
|
||||
|
||||
#include <nvbench/type_list.cuh>
|
||||
#include <nvbench/type_strings.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <string_view>
|
||||
|
||||
using int_list = nvbench::type_list<nvbench::int8_t,
|
||||
nvbench::int16_t,
|
||||
nvbench::int32_t,
|
||||
nvbench::int64_t>;
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
using int_list =
|
||||
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
|
||||
|
||||
using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
|
||||
|
||||
@@ -110,7 +107,6 @@ void test_default_type_axes_names()
|
||||
ASSERT(axes.get_type_axis(4).get_name() == "T4");
|
||||
ASSERT(axes.get_type_axis(4).get_axis_index() == 4);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void test_type_axes()
|
||||
@@ -138,8 +134,7 @@ void test_type_axes()
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
" - {}{}\n",
|
||||
input_string,
|
||||
description.empty() ? ""
|
||||
: fmt::format(" ({})", description));
|
||||
description.empty() ? "" : fmt::format(" ({})", description));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,9 +152,8 @@ Axis: Other
|
||||
)expected";
|
||||
|
||||
const std::string test = fmt::to_string(buffer);
|
||||
const auto diff =
|
||||
std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend());
|
||||
const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
|
||||
const auto diff = std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend());
|
||||
const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
|
||||
ASSERT_MSG(test == ref,
|
||||
"Differs at character {}.\n"
|
||||
"Expected:\n\"{}\"\n\n"
|
||||
@@ -189,9 +183,7 @@ void test_float64_axes()
|
||||
void test_int64_axes()
|
||||
{
|
||||
nvbench::axes_metadata axes;
|
||||
axes.add_int64_axis("I64 Axis",
|
||||
{10, 11, 12, 13, 14},
|
||||
nvbench::int64_axis_flags::none);
|
||||
axes.add_int64_axis("I64 Axis", {10, 11, 12, 13, 14}, nvbench::int64_axis_flags::none);
|
||||
ASSERT(axes.get_axes().size() == 1);
|
||||
const auto &axis = axes.get_int64_axis("I64 Axis");
|
||||
ASSERT(axis.get_size() == 5);
|
||||
@@ -205,9 +197,7 @@ void test_int64_axes()
|
||||
void test_int64_power_of_two_axes()
|
||||
{
|
||||
nvbench::axes_metadata axes;
|
||||
axes.add_int64_axis("I64 POT Axis",
|
||||
{1, 2, 3, 4, 5},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
axes.add_int64_axis("I64 POT Axis", {1, 2, 3, 4, 5}, nvbench::int64_axis_flags::power_of_two);
|
||||
ASSERT(axes.get_axes().size() == 1);
|
||||
const auto &axis = axes.get_int64_axis("I64 POT Axis");
|
||||
ASSERT(axis.get_size() == 5);
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/benchmark.cuh>
|
||||
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/named_values.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
@@ -25,8 +24,6 @@
|
||||
#include <nvbench/type_strings.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
@@ -34,6 +31,8 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> sort(std::vector<T> &&vec)
|
||||
{
|
||||
@@ -61,34 +60,26 @@ void no_op_generator(nvbench::state &state)
|
||||
NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable);
|
||||
|
||||
template <typename Integer, typename Float, typename Other>
|
||||
void template_no_op_generator(nvbench::state &state,
|
||||
nvbench::type_list<Integer, Float, Other>)
|
||||
void template_no_op_generator(nvbench::state &state, nvbench::type_list<Integer, Float, Other>)
|
||||
{
|
||||
ASSERT(nvbench::type_strings<Integer>::input_string() ==
|
||||
state.get_string("Integer"));
|
||||
ASSERT(nvbench::type_strings<Float>::input_string() ==
|
||||
state.get_string("Float"));
|
||||
ASSERT(nvbench::type_strings<Other>::input_string() ==
|
||||
state.get_string("Other"));
|
||||
ASSERT(nvbench::type_strings<Integer>::input_string() == state.get_string("Integer"));
|
||||
ASSERT(nvbench::type_strings<Float>::input_string() == state.get_string("Float"));
|
||||
ASSERT(nvbench::type_strings<Other>::input_string() == state.get_string("Other"));
|
||||
|
||||
// Enum params using non-templated version:
|
||||
no_op_generator(state);
|
||||
}
|
||||
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
|
||||
template_no_op_callable);
|
||||
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable);
|
||||
|
||||
using int_list = nvbench::type_list<nvbench::int8_t,
|
||||
nvbench::int16_t,
|
||||
nvbench::int32_t,
|
||||
nvbench::int64_t>;
|
||||
using int_list =
|
||||
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
|
||||
|
||||
using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
|
||||
|
||||
using misc_list = nvbench::type_list<bool, void>;
|
||||
|
||||
using lots_of_types_bench =
|
||||
nvbench::benchmark<template_no_op_callable,
|
||||
nvbench::type_list<int_list, float_list, misc_list>>;
|
||||
nvbench::benchmark<template_no_op_callable, nvbench::type_list<int_list, float_list, misc_list>>;
|
||||
|
||||
using no_types_bench = nvbench::benchmark<no_op_callable>;
|
||||
|
||||
@@ -110,8 +101,7 @@ void test_type_axes()
|
||||
fmt::format_to(std::back_inserter(buffer),
|
||||
" - {}{}\n",
|
||||
input_string,
|
||||
description.empty() ? ""
|
||||
: fmt::format(" ({})", description));
|
||||
description.empty() ? "" : fmt::format(" ({})", description));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -300,9 +290,7 @@ void test_get_config_count()
|
||||
|
||||
auto const num_devices = bench.get_devices().size();
|
||||
|
||||
ASSERT_MSG(bench.get_config_count() == 72 * num_devices,
|
||||
"Got {}",
|
||||
bench.get_config_count());
|
||||
ASSERT_MSG(bench.get_config_count() == 72 * num_devices, "Got {}", bench.get_config_count());
|
||||
}
|
||||
|
||||
int main()
|
||||
|
||||
@@ -18,11 +18,11 @@
|
||||
|
||||
#include <nvbench/cpu_timer.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_basic()
|
||||
{
|
||||
using namespace std::literals::chrono_literals;
|
||||
|
||||
@@ -16,17 +16,14 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/create.cuh>
|
||||
|
||||
#include <nvbench/benchmark.cuh>
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/create.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/type_list.cuh>
|
||||
#include <nvbench/type_strings.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
@@ -34,6 +31,8 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> sort(std::vector<T> &&vec)
|
||||
{
|
||||
@@ -72,15 +71,11 @@ using misc_types = nvbench::type_list<bool, void>;
|
||||
using type_axes = nvbench::type_list<float_types, int_types, misc_types>;
|
||||
|
||||
template <typename FloatT, typename IntT, typename MiscT>
|
||||
void template_no_op_generator(nvbench::state &state,
|
||||
nvbench::type_list<FloatT, IntT, MiscT>)
|
||||
void template_no_op_generator(nvbench::state &state, nvbench::type_list<FloatT, IntT, MiscT>)
|
||||
{
|
||||
ASSERT(nvbench::type_strings<FloatT>::input_string() ==
|
||||
state.get_string("FloatT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() ==
|
||||
state.get_string("IntT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() ==
|
||||
state.get_string("IntT"));
|
||||
ASSERT(nvbench::type_strings<FloatT>::input_string() == state.get_string("FloatT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
|
||||
|
||||
// Enum params using non-templated version:
|
||||
no_op_generator(state);
|
||||
@@ -116,8 +111,7 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench,
|
||||
|
||||
void validate_default_name()
|
||||
{
|
||||
auto bench =
|
||||
nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone();
|
||||
auto bench = nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone();
|
||||
|
||||
const std::string ref = "Params:\n";
|
||||
|
||||
@@ -127,8 +121,7 @@ void validate_default_name()
|
||||
|
||||
void validate_custom_name()
|
||||
{
|
||||
auto bench =
|
||||
nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone();
|
||||
auto bench = nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone();
|
||||
|
||||
const std::string ref = "Params:\n";
|
||||
|
||||
@@ -138,8 +131,7 @@ void validate_custom_name()
|
||||
|
||||
void validate_no_types()
|
||||
{
|
||||
auto bench =
|
||||
nvbench::benchmark_manager::get().get_benchmark("No Types").clone();
|
||||
auto bench = nvbench::benchmark_manager::get().get_benchmark("No Types").clone();
|
||||
|
||||
const std::string ref = R"expected(Params: Float: 11 Int: 1 String: One
|
||||
Params: Float: 11 Int: 2 String: One
|
||||
@@ -176,8 +168,7 @@ Params: Float: 13 Int: 3 String: Three
|
||||
|
||||
void validate_only_types()
|
||||
{
|
||||
auto bench =
|
||||
nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone();
|
||||
auto bench = nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone();
|
||||
|
||||
const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool
|
||||
Params: FloatT: F32 IntT: I32 MiscT: void
|
||||
@@ -195,8 +186,7 @@ Params: FloatT: F64 IntT: I64 MiscT: void
|
||||
|
||||
void validate_all_axes()
|
||||
{
|
||||
auto bench =
|
||||
nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone();
|
||||
auto bench = nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone();
|
||||
|
||||
const std::string ref =
|
||||
R"expected(Params: Float: 11 FloatT: F32 Int: 1 IntT: I32 MiscT: bool String: One
|
||||
|
||||
@@ -42,27 +42,34 @@ protected:
|
||||
|
||||
void test_no_duplicates_are_allowed()
|
||||
{
|
||||
nvbench::criterion_manager& manager = nvbench::criterion_manager::get();
|
||||
bool exception_triggered = false;
|
||||
nvbench::criterion_manager &manager = nvbench::criterion_manager::get();
|
||||
bool exception_triggered = false;
|
||||
|
||||
try {
|
||||
[[maybe_unused]] nvbench::stopping_criterion_base& _ = manager.get_criterion("custom");
|
||||
} catch(...) {
|
||||
try
|
||||
{
|
||||
[[maybe_unused]] nvbench::stopping_criterion_base &_ = manager.get_criterion("custom");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
exception_triggered = true;
|
||||
}
|
||||
ASSERT(exception_triggered);
|
||||
|
||||
std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>();
|
||||
custom_criterion* custom_raw = custom_ptr.get();
|
||||
custom_criterion *custom_raw = custom_ptr.get();
|
||||
ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw);
|
||||
|
||||
nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom");
|
||||
nvbench::stopping_criterion_base &custom =
|
||||
nvbench::criterion_manager::get().get_criterion("custom");
|
||||
ASSERT(custom_raw == &custom);
|
||||
|
||||
exception_triggered = false;
|
||||
try {
|
||||
try
|
||||
{
|
||||
manager.add(std::make_unique<custom_criterion>());
|
||||
} catch(...) {
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
exception_triggered = true;
|
||||
}
|
||||
ASSERT(exception_triggered);
|
||||
|
||||
@@ -60,4 +60,3 @@ int main()
|
||||
test_compat_overwrite();
|
||||
test_overwrite();
|
||||
}
|
||||
|
||||
|
||||
@@ -16,19 +16,16 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/cuda_timer.cuh>
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
void test_basic(cudaStream_t time_stream,
|
||||
cudaStream_t exec_stream,
|
||||
bool expected)
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_basic(cudaStream_t time_stream, cudaStream_t exec_stream, bool expected)
|
||||
{
|
||||
nvbench::cuda_timer timer;
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/nvbench.cuh>
|
||||
#include "nvbench/cuda_call.cuh"
|
||||
|
||||
/******************************************************************************
|
||||
* Install custom parser.
|
||||
@@ -35,7 +35,7 @@
|
||||
// User code to handle a specific argument:
|
||||
void handle_my_custom_arg();
|
||||
|
||||
// NVBench hook for modiifying the command line arguments before parsing:
|
||||
// NVBench hook for modifying the command line arguments before parsing:
|
||||
void custom_arg_handler(std::vector<std::string> &args)
|
||||
{
|
||||
// Handle and remove "--my-custom-arg"
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
/******************************************************************************
|
||||
* Test having global state that is initialized and finalized via RAII.
|
||||
|
||||
@@ -29,12 +29,10 @@
|
||||
void noisy_bench(nvbench::state &state)
|
||||
{
|
||||
// time, convert ms -> s
|
||||
const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) /
|
||||
1000.f;
|
||||
const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) / 1000.f;
|
||||
// rel stdev
|
||||
const auto noise_pct =
|
||||
static_cast<nvbench::float32_t>(state.get_float64("Noise"));
|
||||
const auto noise = noise_pct / 100.f;
|
||||
const auto noise_pct = static_cast<nvbench::float32_t>(state.get_float64("Noise"));
|
||||
const auto noise = noise_pct / 100.f;
|
||||
// abs stdev
|
||||
const auto stdev = noise * mean;
|
||||
|
||||
@@ -53,8 +51,7 @@ void noisy_bench(nvbench::state &state)
|
||||
try
|
||||
{
|
||||
return static_cast<nvbench::float32_t>(
|
||||
state.get_summary("nv/cold/time/gpu/stdev/relative")
|
||||
.get_float64("value"));
|
||||
state.get_summary("nv/cold/time/gpu/stdev/relative").get_float64("value"));
|
||||
}
|
||||
catch (std::invalid_argument &)
|
||||
{
|
||||
|
||||
@@ -20,11 +20,11 @@
|
||||
#include <nvbench/stopping_criterion.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_const()
|
||||
{
|
||||
@@ -32,7 +32,7 @@ void test_const()
|
||||
nvbench::detail::entropy_criterion criterion;
|
||||
|
||||
criterion.initialize(params);
|
||||
for (int i = 0; i < 6; i++)
|
||||
for (int i = 0; i < 6; i++)
|
||||
{ // nvbench wants at least 5 to compute the standard deviation
|
||||
criterion.add_measurement(42.0);
|
||||
}
|
||||
@@ -48,7 +48,7 @@ void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion)
|
||||
* 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+
|
||||
* 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 |
|
||||
* 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 |
|
||||
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
|
||||
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
|
||||
* 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative
|
||||
* 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 |
|
||||
* 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+
|
||||
|
||||
@@ -18,12 +18,12 @@
|
||||
|
||||
#include <nvbench/enum_type_list.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
// If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39.
|
||||
#if defined(__GNUC__) && __GNUC__ == 7
|
||||
#define USING_GCC_7
|
||||
@@ -102,8 +102,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
|
||||
void test_int()
|
||||
{
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<>, nvbench::type_list<>>));
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<0>,
|
||||
nvbench::type_list<nvbench::enum_type<0>>>));
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<0>, nvbench::type_list<nvbench::enum_type<0>>>));
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<0, 1, 2, 3, 4>,
|
||||
nvbench::type_list<nvbench::enum_type<0>,
|
||||
nvbench::enum_type<1>,
|
||||
@@ -115,42 +114,35 @@ void test_int()
|
||||
void test_scoped_enum()
|
||||
{
|
||||
#ifndef USING_GCC_7
|
||||
ASSERT((
|
||||
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>,
|
||||
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>,
|
||||
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
|
||||
#endif
|
||||
ASSERT((
|
||||
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1,
|
||||
scoped_enum::val_2,
|
||||
scoped_enum::val_3>,
|
||||
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>,
|
||||
nvbench::enum_type<scoped_enum::val_2>,
|
||||
nvbench::enum_type<scoped_enum::val_3>>>));
|
||||
ASSERT((std::is_same_v<
|
||||
nvbench::enum_type_list<scoped_enum::val_1, scoped_enum::val_2, scoped_enum::val_3>,
|
||||
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>,
|
||||
nvbench::enum_type<scoped_enum::val_2>,
|
||||
nvbench::enum_type<scoped_enum::val_3>>>));
|
||||
}
|
||||
|
||||
void test_unscoped_enum()
|
||||
{
|
||||
#ifndef USING_GCC_7
|
||||
ASSERT(
|
||||
(std::is_same_v<nvbench::enum_type_list<unscoped_val_1>,
|
||||
nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>));
|
||||
ASSERT(
|
||||
(std::is_same_v<
|
||||
nvbench::enum_type_list<unscoped_val_1, unscoped_val_2, unscoped_val_3>,
|
||||
nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
|
||||
nvbench::enum_type<unscoped_val_2>,
|
||||
nvbench::enum_type<unscoped_val_3>>>));
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<unscoped_val_1>,
|
||||
nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>));
|
||||
ASSERT((std::is_same_v<nvbench::enum_type_list<unscoped_val_1, unscoped_val_2, unscoped_val_3>,
|
||||
nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
|
||||
nvbench::enum_type<unscoped_val_2>,
|
||||
nvbench::enum_type<unscoped_val_3>>>));
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_scoped_enum_type_strings()
|
||||
{
|
||||
using values = nvbench::enum_type_list<scoped_enum::val_1,
|
||||
scoped_enum::val_2,
|
||||
scoped_enum::val_3>;
|
||||
using val_1 = nvbench::tl::get<0, values>;
|
||||
using val_2 = nvbench::tl::get<1, values>;
|
||||
using val_3 = nvbench::tl::get<2, values>;
|
||||
using values =
|
||||
nvbench::enum_type_list<scoped_enum::val_1, scoped_enum::val_2, scoped_enum::val_3>;
|
||||
using val_1 = nvbench::tl::get<0, values>;
|
||||
using val_2 = nvbench::tl::get<1, values>;
|
||||
using val_3 = nvbench::tl::get<2, values>;
|
||||
ASSERT((nvbench::type_strings<val_1>::input_string() == "1"));
|
||||
ASSERT((nvbench::type_strings<val_1>::description() == "scoped_enum::val_1"));
|
||||
ASSERT((nvbench::type_strings<val_2>::input_string() == "2"));
|
||||
|
||||
@@ -34,8 +34,7 @@ void test_empty()
|
||||
|
||||
const auto clone_base = axis.clone();
|
||||
ASSERT(clone_base.get() != nullptr);
|
||||
const auto *clone =
|
||||
dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
|
||||
const auto *clone = dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
|
||||
ASSERT(clone != nullptr);
|
||||
|
||||
ASSERT(clone->get_name() == "Empty");
|
||||
@@ -62,8 +61,7 @@ void test_basic()
|
||||
|
||||
const auto clone_base = axis.clone();
|
||||
ASSERT(clone_base.get() != nullptr);
|
||||
const auto *clone =
|
||||
dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
|
||||
const auto *clone = dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
|
||||
ASSERT(clone != nullptr);
|
||||
|
||||
ASSERT(clone->get_name() == "Basic");
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
|
||||
#include <nvbench/int64_axis.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_empty()
|
||||
{
|
||||
nvbench::int64_axis axis("Empty");
|
||||
@@ -36,8 +36,7 @@ void test_empty()
|
||||
|
||||
const auto clone_base = axis.clone();
|
||||
ASSERT(clone_base.get() != nullptr);
|
||||
const auto *clone =
|
||||
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
|
||||
const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
|
||||
ASSERT(clone != nullptr);
|
||||
|
||||
ASSERT(clone->get_name() == "Empty");
|
||||
@@ -66,8 +65,7 @@ void test_basic()
|
||||
|
||||
const auto clone_base = axis.clone();
|
||||
ASSERT(clone_base.get() != nullptr);
|
||||
const auto *clone =
|
||||
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
|
||||
const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
|
||||
ASSERT(clone != nullptr);
|
||||
|
||||
ASSERT(clone->get_name() == "BasicAxis");
|
||||
@@ -87,8 +85,7 @@ void test_basic()
|
||||
void test_power_of_two()
|
||||
{
|
||||
nvbench::int64_axis axis{"POTAxis"};
|
||||
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
|
||||
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
|
||||
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
|
||||
|
||||
@@ -102,14 +99,12 @@ void test_power_of_two()
|
||||
for (size_t i = 0; i < 8; ++i)
|
||||
{
|
||||
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
|
||||
ASSERT(axis.get_description(i) ==
|
||||
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
}
|
||||
|
||||
const auto clone_base = axis.clone();
|
||||
ASSERT(clone_base.get() != nullptr);
|
||||
const auto *clone =
|
||||
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
|
||||
const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
|
||||
ASSERT(clone != nullptr);
|
||||
|
||||
ASSERT(clone->get_name() == "POTAxis");
|
||||
@@ -122,8 +117,7 @@ void test_power_of_two()
|
||||
for (size_t i = 0; i < 8; ++i)
|
||||
{
|
||||
ASSERT(clone->get_input_string(i) == fmt::to_string(ref_inputs[i]));
|
||||
ASSERT(clone->get_description(i) ==
|
||||
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
ASSERT(clone->get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -250,8 +244,7 @@ void test_update_none_to_pow2()
|
||||
void test_update_pow2_to_none()
|
||||
{
|
||||
nvbench::int64_axis axis{"TestAxis"};
|
||||
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
|
||||
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
|
||||
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
|
||||
|
||||
@@ -304,8 +297,7 @@ void test_update_pow2_to_none()
|
||||
for (size_t i = 0; i < 8; ++i)
|
||||
{
|
||||
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
|
||||
ASSERT(axis.get_description(i) ==
|
||||
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,8 +305,7 @@ void test_update_pow2_to_pow2()
|
||||
{
|
||||
|
||||
nvbench::int64_axis axis{"TestAxis"};
|
||||
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4},
|
||||
nvbench::int64_axis_flags::power_of_two);
|
||||
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
|
||||
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
|
||||
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
|
||||
|
||||
@@ -369,8 +360,7 @@ void test_update_pow2_to_pow2()
|
||||
for (size_t i = 0; i < 8; ++i)
|
||||
{
|
||||
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
|
||||
ASSERT(axis.get_description(i) ==
|
||||
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
|
||||
#include <nvbench/named_values.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
void test_empty()
|
||||
{
|
||||
nvbench::named_values vals;
|
||||
|
||||
@@ -16,15 +16,14 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/option_parser.cuh>
|
||||
|
||||
#include <nvbench/create.cuh>
|
||||
#include <nvbench/option_parser.cuh>
|
||||
#include <nvbench/type_list.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
//==============================================================================
|
||||
// Declare a couple benchmarks for testing:
|
||||
void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); }
|
||||
@@ -50,8 +49,7 @@ NVBENCH_BENCH_TYPES(TestBench, NVBENCH_TYPE_AXES(Ts, Us))
|
||||
namespace
|
||||
{
|
||||
|
||||
[[nodiscard]] std::string
|
||||
states_to_string(const std::vector<nvbench::state> &states)
|
||||
[[nodiscard]] std::string states_to_string(const std::vector<nvbench::state> &states)
|
||||
{
|
||||
fmt::memory_buffer buffer;
|
||||
std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} "
|
||||
@@ -88,7 +86,7 @@ states_to_string(const std::vector<nvbench::state> &states)
|
||||
|
||||
// Expects the parser to have a single TestBench benchmark. Runs the benchmark
|
||||
// and returns the resulting states.
|
||||
[[nodiscard]] const auto& parser_to_states(nvbench::option_parser &parser)
|
||||
[[nodiscard]] const auto &parser_to_states(nvbench::option_parser &parser)
|
||||
{
|
||||
const auto &benches = parser.get_benchmarks();
|
||||
ASSERT(benches.size() == 1);
|
||||
@@ -267,8 +265,7 @@ void test_int64_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -308,8 +305,7 @@ void test_int64_axis_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -323,8 +319,7 @@ void test_int64_axis_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -369,8 +364,7 @@ void test_int64_axis_pow2_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -384,8 +378,7 @@ void test_int64_axis_pow2_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -425,8 +418,7 @@ void test_int64_axis_pow2_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -440,8 +432,7 @@ void test_int64_axis_pow2_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -486,8 +477,7 @@ void test_int64_axis_none_to_pow2_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -501,8 +491,7 @@ void test_int64_axis_none_to_pow2_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -542,8 +531,7 @@ void test_int64_axis_none_to_pow2_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -557,8 +545,7 @@ void test_int64_axis_none_to_pow2_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -617,8 +604,7 @@ void test_int64_axis_pow2_to_none_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -658,8 +644,7 @@ void test_int64_axis_pow2_to_none_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -673,8 +658,7 @@ void test_int64_axis_pow2_to_none_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -719,8 +703,7 @@ void test_float64_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -734,10 +717,7 @@ void test_float64_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--benchmark",
|
||||
"TestBench",
|
||||
"--axis",
|
||||
" Floats [ ] = [ 3.5 : 3.6 : 1 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 3.6 : 1 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -777,8 +757,7 @@ void test_float64_axis_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -792,18 +771,14 @@ void test_float64_axis_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse({"--benchmark",
|
||||
"TestBench",
|
||||
"--axis",
|
||||
" Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -827,8 +802,7 @@ void test_string_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -842,8 +816,7 @@ void test_string_axis_single()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -883,8 +856,7 @@ void test_string_axis_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -951,8 +923,7 @@ void test_type_axis_multi()
|
||||
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "});
|
||||
parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "});
|
||||
const auto test = parser_to_state_string(parser);
|
||||
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
|
||||
}
|
||||
@@ -1177,9 +1148,8 @@ void test_axis_before_benchmark()
|
||||
void test_min_samples()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench", "--min-samples", "12345"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
parser.parse({"--benchmark", "DummyBench", "--min-samples", "12345"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(states[0].get_min_samples() == 12345);
|
||||
@@ -1188,9 +1158,8 @@ void test_min_samples()
|
||||
void test_min_time()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench", "--min-time", "12345e2"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
parser.parse({"--benchmark", "DummyBench", "--min-time", "12345e2"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(std::abs(states[0].get_min_time() - 12345e2) < 1.);
|
||||
@@ -1199,9 +1168,8 @@ void test_min_time()
|
||||
void test_max_noise()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench", "--max-noise", "50.3"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
parser.parse({"--benchmark", "DummyBench", "--max-noise", "50.3"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(std::abs(states[0].get_max_noise() - 0.503) < 1.e-4);
|
||||
@@ -1210,9 +1178,8 @@ void test_max_noise()
|
||||
void test_skip_time()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench", "--skip-time", "12345e2"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
parser.parse({"--benchmark", "DummyBench", "--skip-time", "12345e2"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(std::abs(states[0].get_skip_time() - 12345e2) < 1.);
|
||||
@@ -1221,9 +1188,8 @@ void test_skip_time()
|
||||
void test_timeout()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench", "--timeout", "12345e2"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
parser.parse({"--benchmark", "DummyBench", "--timeout", "12345e2"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
|
||||
@@ -1232,12 +1198,15 @@ void test_timeout()
|
||||
void test_stopping_criterion()
|
||||
{
|
||||
nvbench::option_parser parser;
|
||||
parser.parse(
|
||||
{"--benchmark", "DummyBench",
|
||||
"--stopping-criterion", "entropy",
|
||||
"--max-angle", "0.42",
|
||||
"--min-r2", "0.6"});
|
||||
const auto& states = parser_to_states(parser);
|
||||
parser.parse({"--benchmark",
|
||||
"DummyBench",
|
||||
"--stopping-criterion",
|
||||
"entropy",
|
||||
"--max-angle",
|
||||
"0.42",
|
||||
"--min-r2",
|
||||
"0.6"});
|
||||
const auto &states = parser_to_states(parser);
|
||||
|
||||
ASSERT(states.size() == 1);
|
||||
ASSERT(states[0].get_stopping_criterion() == "entropy");
|
||||
|
||||
@@ -22,12 +22,9 @@
|
||||
|
||||
void test_basic()
|
||||
{
|
||||
ASSERT((nvbench::range(0, 6) ==
|
||||
std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
|
||||
ASSERT((nvbench::range(0, 6, 1) ==
|
||||
std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
|
||||
ASSERT(
|
||||
(nvbench::range(0, 6, 2) == std::vector<nvbench::int64_t>{0, 2, 4, 6}));
|
||||
ASSERT((nvbench::range(0, 6) == std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
|
||||
ASSERT((nvbench::range(0, 6, 1) == std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
|
||||
ASSERT((nvbench::range(0, 6, 2) == std::vector<nvbench::int64_t>{0, 2, 4, 6}));
|
||||
ASSERT((nvbench::range(0, 6, 3) == std::vector<nvbench::int64_t>{0, 3, 6}));
|
||||
ASSERT((nvbench::range(0, 6, 4) == std::vector<nvbench::int64_t>{0, 4}));
|
||||
ASSERT((nvbench::range(0, 6, 5) == std::vector<nvbench::int64_t>{0, 5}));
|
||||
@@ -37,26 +34,19 @@ void test_basic()
|
||||
void test_result_type()
|
||||
{
|
||||
// All ints should turn into int64 by default:
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0ll, 1ll)),
|
||||
std::vector<nvbench::int64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0, 1)),
|
||||
std::vector<nvbench::int64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0u, 1u)),
|
||||
std::vector<nvbench::int64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0ll, 1ll)), std::vector<nvbench::int64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0, 1)), std::vector<nvbench::int64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0u, 1u)), std::vector<nvbench::int64_t>>));
|
||||
|
||||
// All floats should turn into float64 by default:
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0., 1.)),
|
||||
std::vector<nvbench::float64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0.f, 1.f)),
|
||||
std::vector<nvbench::float64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0., 1.)), std::vector<nvbench::float64_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range(0.f, 1.f)), std::vector<nvbench::float64_t>>));
|
||||
|
||||
// Other types may be explicitly specified:
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::float32_t,
|
||||
nvbench::float32_t>(0.f, 1.f)),
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::float32_t, nvbench::float32_t>(0.f, 1.f)),
|
||||
std::vector<nvbench::float32_t>>));
|
||||
ASSERT((std::is_same_v<
|
||||
decltype(nvbench::range<nvbench::int32_t, nvbench::int32_t>(0, 1)),
|
||||
std::vector<nvbench::int32_t>>));
|
||||
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::int32_t, nvbench::int32_t>(0, 1)),
|
||||
std::vector<nvbench::int32_t>>));
|
||||
}
|
||||
|
||||
void test_fp_tolerance()
|
||||
@@ -68,10 +58,8 @@ void test_fp_tolerance()
|
||||
const nvbench::float32_t stride = 1e-4f;
|
||||
for (std::size_t size = 1; size < 1024; ++size)
|
||||
{
|
||||
const nvbench::float32_t end =
|
||||
start + stride * static_cast<nvbench::float32_t>(size - 1);
|
||||
ASSERT_MSG(nvbench::range(start, end, stride).size() == size,
|
||||
"size={}", size);
|
||||
const nvbench::float32_t end = start + stride * static_cast<nvbench::float32_t>(size - 1);
|
||||
ASSERT_MSG(nvbench::range(start, end, stride).size() == size, "size={}", size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,18 +2,17 @@
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
__global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b)
|
||||
{
|
||||
const auto id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
b[id] = 5 * a[id];
|
||||
}
|
||||
__global__ void multiply5(const int32_t *__restrict__ a, int32_t *__restrict__ b)
|
||||
{
|
||||
const auto id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
b[id] = 5 * a[id];
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int main()
|
||||
{
|
||||
{
|
||||
multiply5<<<256, 256>>>(nullptr, nullptr);
|
||||
|
||||
try
|
||||
|
||||
@@ -18,14 +18,13 @@
|
||||
|
||||
#include <nvbench/detail/ring_buffer.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
template <typename T>
|
||||
bool equal(const nvbench::detail::ring_buffer<T> &buffer,
|
||||
const std::vector<T> &reference)
|
||||
bool equal(const nvbench::detail::ring_buffer<T> &buffer, const std::vector<T> &reference)
|
||||
{
|
||||
return std::equal(buffer.begin(), buffer.end(), reference.begin());
|
||||
}
|
||||
|
||||
@@ -16,23 +16,22 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/runner.cuh>
|
||||
|
||||
#include <nvbench/benchmark.cuh>
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/runner.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/type_list.cuh>
|
||||
#include <nvbench/type_strings.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "test_asserts.cuh"
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> sort(std::vector<T> &&vec)
|
||||
{
|
||||
@@ -65,21 +64,16 @@ using misc_types = nvbench::type_list<bool, void>;
|
||||
using type_axes = nvbench::type_list<float_types, int_types, misc_types>;
|
||||
|
||||
template <typename FloatT, typename IntT, typename MiscT>
|
||||
void template_no_op_generator(nvbench::state &state,
|
||||
nvbench::type_list<FloatT, IntT, MiscT>)
|
||||
void template_no_op_generator(nvbench::state &state, nvbench::type_list<FloatT, IntT, MiscT>)
|
||||
{
|
||||
ASSERT(nvbench::type_strings<FloatT>::input_string() ==
|
||||
state.get_string("FloatT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() ==
|
||||
state.get_string("IntT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() ==
|
||||
state.get_string("IntT"));
|
||||
ASSERT(nvbench::type_strings<FloatT>::input_string() == state.get_string("FloatT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
|
||||
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
|
||||
|
||||
// Enum params using non-templated version:
|
||||
no_op_generator(state);
|
||||
}
|
||||
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
|
||||
template_no_op_callable);
|
||||
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable);
|
||||
|
||||
void test_empty()
|
||||
{
|
||||
|
||||
@@ -16,10 +16,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/state.cuh>
|
||||
|
||||
#include <nvbench/benchmark.cuh>
|
||||
#include <nvbench/callable.cuh>
|
||||
#include <nvbench/state.cuh>
|
||||
#include <nvbench/summary.cuh>
|
||||
#include <nvbench/types.cuh>
|
||||
|
||||
@@ -43,8 +42,7 @@ struct state_tester : public nvbench::state
|
||||
void set_param(std::string name, T &&value)
|
||||
{
|
||||
this->state::m_axis_values.set_value(std::move(name),
|
||||
nvbench::named_values::value_type{
|
||||
std::forward<T>(value)});
|
||||
nvbench::named_values::value_type{std::forward<T>(value)});
|
||||
}
|
||||
};
|
||||
} // namespace nvbench::detail
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user