Formatting updates.

This commit is contained in:
Allison Piper
2025-04-14 17:26:12 +00:00
parent de36f1a248
commit 3440855dbd
107 changed files with 808 additions and 967 deletions

View File

@@ -41,16 +41,28 @@ CompactNamespaces: false
ContinuationIndentWidth: 2
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<cub'
Priority: 1
- Regex: '^<thrust'
Priority: 2
- Regex: '^<cuda'
Priority: 3
- Regex: '^<nvbench'
Priority: 1
- Regex: '^<cub'
Priority: 2
- Regex: '^<thrust'
Priority: 3
- Regex: '^<cuda/'
Priority: 4
- Regex: '^<[a-z]*>$'
- Regex: '^<cuda'
Priority: 5
- Regex: '^<nvml'
Priority: 6
- Regex: '^<cupti'
Priority: 7
- Regex: '^<nvperf'
Priority: 8
- Regex: '^<nlohmann'
Priority: 9
- Regex: '^<fmt'
Priority: 10
- Regex: '^<[a-z_]*>$'
Priority: 11
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 2

View File

@@ -34,7 +34,7 @@ CCCL uses [Development Containers](https://containers.dev/) to provide consisten
- Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it.
![Shows "Reopen in Container" in command pallete.](./img/open_in_container_manual.png)
![Shows "Reopen in Container" in command palette.](./img/open_in_container_manual.png)
4. Select an environment with the desired CTK and host compiler from the list:
@@ -136,7 +136,7 @@ For more information, see the `.devcontainer/make_devcontainers.sh --help` messa
2. Install WSL 2 by running:
```bash
wsl --install
wsl --install
```
This should probably install Ubuntu distro as a default.
@@ -182,14 +182,14 @@ then run `sudo systemctl restart docker.service`.
10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located).
11. If prompted, choose `Reopen in Container`.
- If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`.
12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order.
From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration:
13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
> Failed opening a web browser at https://github.com/login/device
exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH

View File

@@ -304,4 +304,3 @@ main() {
}
main "$@"

View File

@@ -14,4 +14,3 @@
#
# Only add commits that are pure formatting changes (e.g. clang-format version changes, etc).
8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100

View File

@@ -65,6 +65,7 @@ long_ext_count = 10
class Target:
"""Represents a single line read for a .ninja_log file."""
def __init__(self, start, end):
"""Creates a target object by passing in the start/end times in seconds
as a float."""
@@ -94,9 +95,9 @@ class Target:
"""
# Allow for modest floating-point errors
epsilon = 0.000002
if (self.weighted_duration > self.Duration() + epsilon):
print('%s > %s?' % (self.weighted_duration, self.Duration()))
assert (self.weighted_duration <= self.Duration() + epsilon)
if self.weighted_duration > self.Duration() + epsilon:
print("%s > %s?" % (self.weighted_duration, self.Duration()))
assert self.weighted_duration <= self.Duration() + epsilon
return self.weighted_duration
def DescribeTargets(self):
@@ -104,10 +105,10 @@ class Target:
# Some build steps generate dozens of outputs - handle them sanely.
# The max_length was chosen so that it can fit most of the long
# single-target names, while minimizing word wrapping.
result = ', '.join(self.targets)
result = ", ".join(self.targets)
max_length = 65
if len(result) > max_length:
result = result[:max_length] + '...'
result = result[:max_length] + "..."
return result
@@ -121,12 +122,11 @@ def ReadTargets(log, show_all):
# targets.
if not header:
return []
assert header == '# ninja log v5\n', \
'unrecognized ninja log version %r' % header
assert header == "# ninja log v5\n", "unrecognized ninja log version %r" % header
targets_dict = {}
last_end_seen = 0.0
for line in log:
parts = line.strip().split('\t')
parts = line.strip().split("\t")
if len(parts) != 5:
# If ninja.exe is rudely halted then the .ninja_log file may be
# corrupt. Silently continue.
@@ -165,17 +165,17 @@ def ReadTargets(log, show_all):
def GetExtension(target, extra_patterns):
"""Return the file extension that best represents a target.
For targets that generate multiple outputs it is important to return a
consistent 'canonical' extension. Ultimately the goal is to group build steps
by type."""
For targets that generate multiple outputs it is important to return a
consistent 'canonical' extension. Ultimately the goal is to group build steps
by type."""
for output in target.targets:
if extra_patterns:
for fn_pattern in extra_patterns.split(';'):
if fnmatch.fnmatch(output, '*' + fn_pattern + '*'):
for fn_pattern in extra_patterns.split(";"):
if fnmatch.fnmatch(output, "*" + fn_pattern + "*"):
return fn_pattern
# Not a true extension, but a good grouping.
if output.endswith('type_mappings'):
extension = 'type_mappings'
if output.endswith("type_mappings"):
extension = "type_mappings"
break
# Capture two extensions if present. For example: file.javac.jar should
@@ -185,26 +185,26 @@ def GetExtension(target, extra_patterns):
extension = ext2 + ext1 # Preserve the order in the file name.
if len(extension) == 0:
extension = '(no extension found)'
extension = "(no extension found)"
if ext1 in ['.pdb', '.dll', '.exe']:
extension = 'PEFile (linking)'
if ext1 in [".pdb", ".dll", ".exe"]:
extension = "PEFile (linking)"
# Make sure that .dll and .exe are grouped together and that the
# .dll.lib files don't cause these to be listed as libraries
break
if ext1 in ['.so', '.TOC']:
extension = '.so (linking)'
if ext1 in [".so", ".TOC"]:
extension = ".so (linking)"
# Attempt to identify linking, avoid identifying as '.TOC'
break
# Make sure .obj files don't get categorized as mojo files
if ext1 in ['.obj', '.o']:
if ext1 in [".obj", ".o"]:
break
# Jars are the canonical output of java targets.
if ext1 == '.jar':
if ext1 == ".jar":
break
# Normalize all mojo related outputs to 'mojo'.
if output.count('.mojom') > 0:
extension = 'mojo'
if output.count(".mojom") > 0:
extension = "mojo"
break
return extension
@@ -229,8 +229,8 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
if target.end > latest:
latest = target.end
total_cpu_time += target.Duration()
task_start_stop_times.append((target.start, 'start', target))
task_start_stop_times.append((target.end, 'stop', target))
task_start_stop_times.append((target.start, "start", target))
task_start_stop_times.append((target.end, "stop", target))
length = latest - earliest
weighted_total = 0.0
@@ -256,10 +256,10 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
if num_running > 0:
# Update the total weighted time up to this moment.
last_weighted_time += (time - last_time) / float(num_running)
if action_name == 'start':
if action_name == "start":
# Record the total weighted task time when this task starts.
running_tasks[target] = last_weighted_time
if action_name == 'stop':
if action_name == "stop":
# Record the change in the total weighted task time while this task
# ran.
weighted_duration = last_weighted_time - running_tasks[target]
@@ -267,24 +267,27 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
weighted_total += weighted_duration
del running_tasks[target]
last_time = time
assert (len(running_tasks) == 0)
assert len(running_tasks) == 0
# Warn if the sum of weighted times is off by more than half a second.
if abs(length - weighted_total) > 500:
print('Warning: Possible corrupt ninja log, results may be '
'untrustworthy. Length = %.3f, weighted total = %.3f' %
(length, weighted_total))
print(
"Warning: Possible corrupt ninja log, results may be "
"untrustworthy. Length = %.3f, weighted total = %.3f"
% (length, weighted_total)
)
# Print the slowest build steps:
print(' Longest build steps:')
print(" Longest build steps:")
if elapsed_time_sorting:
entries.sort(key=lambda x: x.Duration())
else:
entries.sort(key=lambda x: x.WeightedDuration())
for target in entries[-long_count:]:
print(' %8.1f weighted s to build %s (%.1f s elapsed time)' %
(target.WeightedDuration(), target.DescribeTargets(),
target.Duration()))
print(
" %8.1f weighted s to build %s (%.1f s elapsed time)"
% (target.WeightedDuration(), target.DescribeTargets(), target.Duration())
)
# Sum up the time by file extension/type of the output file
count_by_ext = {}
@@ -293,51 +296,56 @@ def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
# Scan through all of the targets to build up per-extension statistics.
for target in entries:
extension = GetExtension(target, extra_step_types)
time_by_ext[extension] = time_by_ext.get(extension,
0) + target.Duration()
weighted_time_by_ext[extension] = weighted_time_by_ext.get(
extension, 0) + target.WeightedDuration()
time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration()
weighted_time_by_ext[extension] = (
weighted_time_by_ext.get(extension, 0) + target.WeightedDuration()
)
count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
print(' Time by build-step type:')
print(" Time by build-step type:")
# Copy to a list with extension name and total time swapped, to (time, ext)
if elapsed_time_sorting:
weighted_time_by_ext_sorted = sorted(
(y, x) for (x, y) in time_by_ext.items())
weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in time_by_ext.items())
else:
weighted_time_by_ext_sorted = sorted(
(y, x) for (x, y) in weighted_time_by_ext.items())
(y, x) for (x, y) in weighted_time_by_ext.items()
)
# Print the slowest build target types:
for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
print(
' %8.1f s weighted time to generate %d %s files '
'(%1.1f s elapsed time sum)' %
(time, count_by_ext[extension], extension, time_by_ext[extension]))
" %8.1f s weighted time to generate %d %s files "
"(%1.1f s elapsed time sum)"
% (time, count_by_ext[extension], extension, time_by_ext[extension])
)
print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx '
'parallelism)' %
(length, total_cpu_time, total_cpu_time * 1.0 / length))
print(' %d build steps completed, average of %1.2f/s' %
(len(entries), len(entries) / (length)))
print(
" %.1f s weighted time (%.1f s elapsed time sum, %1.1fx "
"parallelism)" % (length, total_cpu_time, total_cpu_time * 1.0 / length)
)
print(
" %d build steps completed, average of %1.2f/s"
% (len(entries), len(entries) / (length))
)
def main():
log_file = '.ninja_log'
metrics_file = 'siso_metrics.json'
log_file = ".ninja_log"
metrics_file = "siso_metrics.json"
parser = argparse.ArgumentParser()
parser.add_argument('-C', dest='build_directory', help='Build directory.')
parser.add_argument("-C", dest="build_directory", help="Build directory.")
parser.add_argument(
'-s',
'--step-types',
help='semicolon separated fnmatch patterns for build-step grouping')
"-s",
"--step-types",
help="semicolon separated fnmatch patterns for build-step grouping",
)
parser.add_argument(
'-e',
'--elapsed_time_sorting',
"-e",
"--elapsed_time_sorting",
default=False,
action='store_true',
help='Sort output by elapsed time instead of weighted time')
parser.add_argument('--log-file',
help="specific ninja log file to analyze.")
action="store_true",
help="Sort output by elapsed time instead of weighted time",
)
parser.add_argument("--log-file", help="specific ninja log file to analyze.")
args, _extra_args = parser.parse_known_args()
if args.build_directory:
log_file = os.path.join(args.build_directory, log_file)
@@ -348,34 +356,35 @@ def main():
# Offer a convenient way to add extra step types automatically,
# including when this script is run by autoninja. get() returns None if
# the variable isn't set.
args.step_types = os.environ.get('chromium_step_types')
args.step_types = os.environ.get("chromium_step_types")
if args.step_types:
# Make room for the extra build types.
global long_ext_count
long_ext_count += len(args.step_types.split(';'))
long_ext_count += len(args.step_types.split(";"))
if os.path.exists(metrics_file):
# Automatically handle summarizing siso builds.
cmd = ['siso.bat' if 'win32' in sys.platform else 'siso']
cmd.extend(['metrics', 'summary'])
cmd = ["siso.bat" if "win32" in sys.platform else "siso"]
cmd.extend(["metrics", "summary"])
if args.build_directory:
cmd.extend(['-C', args.build_directory])
cmd.extend(["-C", args.build_directory])
if args.step_types:
cmd.extend(['--step_types', args.step_types])
cmd.extend(["--step_types", args.step_types])
if args.elapsed_time_sorting:
cmd.append('--elapsed_time_sorting')
cmd.append("--elapsed_time_sorting")
subprocess.run(cmd)
else:
try:
with open(log_file, 'r') as log:
with open(log_file, "r") as log:
entries = ReadTargets(log, False)
if entries:
SummarizeEntries(entries, args.step_types,
args.elapsed_time_sorting)
SummarizeEntries(
entries, args.step_types, args.elapsed_time_sorting
)
except IOError:
print('Log file %r not found, no build summary created.' % log_file)
print("Log file %r not found, no build summary created." % log_file)
return errno.ENOENT
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

View File

@@ -37,5 +37,5 @@ if [ $requests_diff -eq 0 ]; then
else
hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
echo "sccache hit rate: $hit_rate%" >&2
echo "$hit_rate"
echo "$hit_rate"
fi

View File

@@ -1,4 +1,4 @@
# Detect the langauge standards supported by the current compilers.
# Detect the language standards supported by the current compilers.
#
# Usage: detect_supported_cxx_standards(<var_prefix> <lang> <standards>)
#

View File

@@ -14,7 +14,7 @@
# limitations under the License.
# Passes all args directly to execute_process while setting up the following
# results variables and propogating them to the caller's scope:
# results variables and propagating them to the caller's scope:
#
# - nvbench_process_exit_code
# - nvbench_process_stdout

View File

@@ -9,9 +9,9 @@
// a potential macro collision and halts.
//
// Hacky way to build a string, but it works on all tested platforms.
#define NVBench_MACRO_CHECK(MACRO, HEADER) \
NVBench_MACRO_CHECK_IMPL(Identifier MACRO should not be used from NVBench \
headers due to conflicts with HEADER macros.)
#define NVBench_MACRO_CHECK(MACRO, HEADER) \
NVBench_MACRO_CHECK_IMPL( \
Identifier MACRO should not be used from NVBench headers due to conflicts with HEADER macros.)
// Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we
// don't want to #include any headers other than the one being tested.
@@ -34,8 +34,8 @@
// library implementations unconditionally `#undef` these macros, which then
// causes random failures later.
// Leaving these commented out as a warning: Here be dragons.
//#define min(...) NVBench_MACRO_CHECK('min', windows.h)
//#define max(...) NVBench_MACRO_CHECK('max', windows.h)
// #define min(...) NVBench_MACRO_CHECK('min', windows.h)
// #define max(...) NVBench_MACRO_CHECK('max', windows.h)
// termios.h conflicts (NVIDIA/thrust#1547)
#define B0 NVBench_MACRO_CHECK("B0", termios.h)

View File

@@ -90,7 +90,7 @@
before any `--benchmark` arguments.
* `--stopping-criterion <criterion>`
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
samples were collected.
* Only applies to Cold measurements.
* Default is stdrel (`--stopping-criterion stdrel`)

View File

@@ -24,37 +24,33 @@
template <int ItemsPerThread>
__global__ void kernel(std::size_t stride,
std::size_t elements,
const nvbench::int32_t * __restrict__ in,
const nvbench::int32_t *__restrict__ in,
nvbench::int32_t *__restrict__ out)
{
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const std::size_t step = gridDim.x * blockDim.x;
for (std::size_t i = stride * tid;
i < stride * elements;
i += stride * step)
for (std::size_t i = stride * tid; i < stride * elements; i += stride * step)
{
for (int j = 0; j < ItemsPerThread; j++)
{
const auto read_id = (ItemsPerThread * i + j) % elements;
const auto read_id = (ItemsPerThread * i + j) % elements;
const auto write_id = tid + j * elements;
out[write_id] = in[read_id];
out[write_id] = in[read_id];
}
}
}
// `throughput_bench` copies a 128 MiB buffer of int32_t, and reports throughput
// and cache hit rates.
//
// Calling state.collect_*() enables particular metric collection if nvbench
// was build with CUPTI support (CMake option: -DNVBench_ENABLE_CUPTI=ON).
template <int ItemsPerThread>
void throughput_bench(nvbench::state &state,
nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
void throughput_bench(nvbench::state &state, nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
{
// Allocate input data:
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
const std::size_t elements = 128 * 1024 * 1024 / sizeof(nvbench::int32_t);
thrust::device_vector<nvbench::int32_t> input(elements);
thrust::device_vector<nvbench::int32_t> output(elements * ItemsPerThread);
@@ -72,12 +68,11 @@ void throughput_bench(nvbench::state &state,
static_cast<int>((elements + threads_in_block - 1) / threads_in_block);
state.exec([&](nvbench::launch &launch) {
kernel<ItemsPerThread>
<<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
stride,
elements,
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()));
kernel<ItemsPerThread><<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
stride,
elements,
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()));
});
}

View File

@@ -71,18 +71,16 @@ void copy_sweep_grid_shape(nvbench::state &state)
thrust::device_vector<nvbench::int32_t> in(num_values, 0);
thrust::device_vector<nvbench::int32_t> out(num_values, 0);
state.exec(
[block_size,
num_blocks,
num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(
in_ptr,
out_ptr,
num_values);
});
state.exec([block_size,
num_blocks,
num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(in_ptr,
out_ptr,
num_values);
});
}
NVBENCH_BENCH(copy_sweep_grid_shape)
// Every second power of two from 64->1024:
@@ -107,15 +105,12 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list<ValueType>)
thrust::device_vector<ValueType> in(num_values, 0);
thrust::device_vector<ValueType> out(num_values, 0);
state.exec(
[num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
out_ptr,
num_values);
});
state.exec([num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
});
}
// Define a type_list to use for the type axis:
using cts_types = nvbench::type_list<nvbench::uint8_t,
@@ -131,11 +126,10 @@ NVBENCH_BENCH_TYPES(copy_type_sweep, NVBENCH_TYPE_AXES(cts_types));
// Convert 64 MiB of InputTypes to OutputTypes, represented with various
// value_types.
template <typename InputType, typename OutputType>
void copy_type_conversion_sweep(nvbench::state &state,
nvbench::type_list<InputType, OutputType>)
void copy_type_conversion_sweep(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
{
// Optional: Skip narrowing conversions.
if constexpr(sizeof(InputType) > sizeof(OutputType))
if constexpr (sizeof(InputType) > sizeof(OutputType))
{
state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType).");
return;
@@ -154,15 +148,12 @@ void copy_type_conversion_sweep(nvbench::state &state,
thrust::device_vector<InputType> in(num_values, 0);
thrust::device_vector<OutputType> out(num_values, 0);
state.exec(
[num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
out_ptr,
num_values);
});
state.exec([num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
});
}
// Optional: Skip when InputType == OutputType. This approach avoids
// instantiating the benchmark at all.
@@ -178,6 +169,5 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
nvbench::float32_t,
nvbench::int64_t,
nvbench::float64_t>;
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep,
NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
.set_type_axes_names({"In", "Out"});

View File

@@ -36,10 +36,7 @@ public:
protected:
// Setup the criterion in the `do_initialize()` method:
virtual void do_initialize() override
{
m_num_samples = 0;
}
virtual void do_initialize() override { m_num_samples = 0; }
// Process new measurements in the `add_measurement()` method:
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
@@ -52,7 +49,6 @@ protected:
{
return m_num_samples >= m_params.get_int64("max-samples");
}
};
// Register the criterion with NVBench:
@@ -71,7 +67,7 @@ void throughput_bench(nvbench::state &state)
state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
(void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),

View File

@@ -17,7 +17,6 @@
*/
#include <nvbench/nvbench.cuh>
#include <nvbench/test_kernels.cuh>
// Enum to use as parameter axis:
@@ -68,12 +67,10 @@ void runtime_enum_sweep_string(nvbench::state &state)
// Create inputs, etc, configure runtime kernel parameters, etc.
// Just a dummy kernel.
state.exec([](nvbench::launch &launch) {
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
});
state.exec(
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
}
NVBENCH_BENCH(runtime_enum_sweep_string)
.add_string_axis("MyEnum", {"A", "B", "C"});
NVBENCH_BENCH(runtime_enum_sweep_string).add_string_axis("MyEnum", {"A", "B", "C"});
//==============================================================================
// Sweep through enum values at runtime using an int64 axis.
@@ -97,9 +94,8 @@ void runtime_enum_sweep_int64(nvbench::state &state)
// Create inputs, etc, configure runtime kernel parameters, etc.
// Just a dummy kernel.
state.exec([](nvbench::launch &launch) {
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
});
state.exec(
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
}
NVBENCH_BENCH(runtime_enum_sweep_int64)
.add_int64_axis("MyEnum",
@@ -178,12 +174,10 @@ void compile_time_enum_sweep(nvbench::state &state,
// Template parameters, static dispatch, etc.
// Just a dummy kernel.
state.exec([](nvbench::launch &launch) {
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
});
state.exec(
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
}
using MyEnumList =
nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
using MyEnumList = nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
.set_type_axes_names({"MyEnum"});
@@ -199,16 +193,14 @@ NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
// * `-12` (struct std::integral_constant<int,-12>)
// ```
template <nvbench::int32_t IntValue>
void compile_time_int_sweep(nvbench::state &state,
nvbench::type_list<nvbench::enum_type<IntValue>>)
void compile_time_int_sweep(nvbench::state &state, nvbench::type_list<nvbench::enum_type<IntValue>>)
{
// Use IntValue in compile time contexts.
// Template parameters, static dispatch, etc.
// Just a dummy kernel.
state.exec([](nvbench::launch &launch) {
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
});
state.exec(
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
}
using MyInts = nvbench::enum_type_list<0, 16, 4096, -12>;
NVBENCH_BENCH_TYPES(compile_time_int_sweep, NVBENCH_TYPE_AXES(MyInts))

View File

@@ -53,9 +53,7 @@ void sequence_bench(nvbench::state &state)
// nvbench::exec_tag::sync indicates that this will implicitly sync:
state.exec(nvbench::exec_tag::sync, [&data](nvbench::launch &launch) {
thrust::sequence(thrust::device.on(launch.get_stream()),
data.begin(),
data.end());
thrust::sequence(thrust::device.on(launch.get_stream()), data.begin(), data.end());
});
}
NVBENCH_BENCH(sequence_bench);

View File

@@ -23,8 +23,8 @@
// Thrust simplifies memory management, etc:
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/sequence.h>
// mod2_inplace performs an in-place mod2 over every element in `data`. `data`
@@ -54,7 +54,7 @@ void mod2_inplace(nvbench::state &state)
state.exec(nvbench::exec_tag::timer,
// Lambda now takes a `timer` argument:
[&input, &data, num_values](nvbench::launch &launch, auto &timer) {
(void) num_values; // clang thinks this is unused...
(void)num_values; // clang thinks this is unused...
// Reset working data:
thrust::copy(thrust::device.on(launch.get_stream()),

View File

@@ -72,14 +72,12 @@ NVBENCH_BENCH(runtime_skip)
// Two type axes are swept, but configurations where InputType == OutputType are
// skipped.
template <typename InputType, typename OutputType>
void skip_overload(nvbench::state &state,
nvbench::type_list<InputType, OutputType>)
void skip_overload(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
{
// This is a contrived example that focuses on the skip overloads, so this is
// just a sleep kernel:
state.exec([](nvbench::launch &launch) {
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
});
state.exec(
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
}
// Overload of skip_overload that is called when InputType == OutputType.
template <typename T>
@@ -107,9 +105,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
{
// This is a contrived example that focuses on the skip overloads, so this is
// just a sleep kernel:
state.exec([](nvbench::launch &launch) {
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
});
state.exec(
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
}
// Enable this overload if InputType is larger than OutputType
template <typename InputType, typename OutputType>
@@ -119,10 +116,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
state.skip("sizeof(InputType) > sizeof(OutputType).");
}
// The same type_list is used for both inputs/outputs.
using sn_types = nvbench::type_list<nvbench::int8_t,
nvbench::int16_t,
nvbench::int32_t,
nvbench::int64_t>;
using sn_types =
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
// Setup benchmark:
NVBENCH_BENCH_TYPES(skip_sfinae, NVBENCH_TYPE_AXES(sn_types, sn_types))
.set_type_axes_names({"In", "Out"});

View File

@@ -52,7 +52,7 @@ void stream_bench(nvbench::state &state)
state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream));
state.exec([&input, &output, num_values](nvbench::launch &) {
(void) num_values; // clang thinks this is unused...
(void)num_values; // clang thinks this is unused...
copy(thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),
num_values);

View File

@@ -26,8 +26,8 @@
void summary_example(nvbench::state &state)
{
// Fetch parameters and compute duration in seconds:
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
const auto duration = ms * 1e-3 + us * 1e-6;
// Add a new column to the summary table with the derived duration used by the benchmark.

View File

@@ -51,7 +51,7 @@ void throughput_bench(nvbench::state &state)
state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec([&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
(void)num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),

View File

@@ -1,20 +1,20 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/main.cuh>
@@ -24,7 +24,7 @@ int main(int argc, char const *const *argv)
try
{
// If no args, substitute a new argv that prints the version
std::vector<const char*> alt_argv;
std::vector<const char *> alt_argv;
if (argc == 1)
{
alt_argv.push_back("--version");
@@ -36,7 +36,7 @@ try
NVBENCH_CUDA_CALL(cudaDeviceReset());
return 0;
}
catch (std::exception & e)
catch (std::exception &e)
{
std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n";
return 1;

View File

@@ -19,13 +19,13 @@
#include <nvbench/axes_metadata.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#include <fmt/ranges.h>
#include <algorithm>
#include <cassert>
#include <stdexcept>
#include <fmt/format.h>
#include <fmt/ranges.h>
namespace nvbench
{

View File

@@ -16,7 +16,7 @@
* limitations under the License.
*/
#include "axis_base.cuh"
#include <nvbench/axis_base.cuh>
namespace nvbench
{

View File

@@ -18,9 +18,8 @@
#pragma once
#include <nvbench/benchmark_base.cuh>
#include <nvbench/axes_metadata.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/runner.cuh>
#include <nvbench/type_list.cuh>

View File

@@ -45,7 +45,7 @@ struct benchmark_manager
* benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA
* environment in `main`.
*/
void initialize();
void initialize();
/**
* Register a new benchmark.

View File

@@ -17,9 +17,8 @@
*/
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_manager.cuh>
#include <fmt/format.h>
@@ -37,8 +36,8 @@ benchmark_manager &benchmark_manager::get()
void benchmark_manager::initialize()
{
const auto& mgr = device_manager::get();
for (auto& bench : m_benchmarks)
const auto &mgr = device_manager::get();
for (auto &bench : m_benchmarks)
{
if (!bench->get_is_cpu_only())
{

View File

@@ -17,12 +17,10 @@
*/
#include <nvbench/blocking_kernel.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/types.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/types.cuh>
#include <cuda/std/chrono>

View File

@@ -24,7 +24,6 @@
#include <nvbench/types.cuh>
#include <memory>
#include <unordered_map>
namespace nvbench
@@ -40,14 +39,14 @@ public:
/**
* @return The singleton criterion_manager instance.
*/
static criterion_manager& get();
static criterion_manager &get();
/**
* Register a new stopping criterion.
*/
nvbench::stopping_criterion_base& add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
nvbench::stopping_criterion_base& get_criterion(const std::string& name);
const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const;
nvbench::stopping_criterion_base &add(std::unique_ptr<nvbench::stopping_criterion_base> criterion);
nvbench::stopping_criterion_base &get_criterion(const std::string &name);
const nvbench::stopping_criterion_base &get_criterion(const std::string &name) const;
using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
params_description get_params_description() const;

View File

@@ -41,7 +41,7 @@ criterion_manager &criterion_manager::get()
return registry;
}
stopping_criterion_base& criterion_manager::get_criterion(const std::string& name)
stopping_criterion_base &criterion_manager::get_criterion(const std::string &name)
{
auto iter = m_map.find(name);
if (iter == m_map.end())
@@ -51,7 +51,8 @@ stopping_criterion_base& criterion_manager::get_criterion(const std::string& nam
return *iter->second.get();
}
const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const
const nvbench::stopping_criterion_base &
criterion_manager::get_criterion(const std::string &name) const
{
auto iter = m_map.find(name);
if (iter == m_map.end())
@@ -69,8 +70,7 @@ stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criteri
if (!success)
{
NVBENCH_THROW(std::runtime_error,
"Stopping criterion \"{}\" is already registered.", name);
NVBENCH_THROW(std::runtime_error, "Stopping criterion \"{}\" is already registered.", name);
}
return *it->second.get();

View File

@@ -16,14 +16,12 @@
* limitations under the License.
*/
#include <nvbench/csv_printer.cuh>
#include <nvbench/axes_metadata.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/csv_printer.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/internal/table_builder.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h>
@@ -169,7 +167,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches)
std::size_t remaining = table.m_columns.size();
for (const auto &col : table.m_columns)
{
fmt::format_to(std::back_inserter(buffer), "{}{}", col.rows[i], (--remaining == 0) ? "" : ",");
fmt::format_to(std::back_inserter(buffer),
"{}{}",
col.rows[i],
(--remaining == 0) ? "" : ",");
}
fmt::format_to(std::back_inserter(buffer), "\n");
}

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/cuda_call.cuh>
#include <nvbench/types.cuh>
#include <cuda_runtime_api.h>

View File

@@ -17,7 +17,6 @@
*/
#include <nvbench/cupti_profiler.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh>
@@ -54,7 +53,9 @@ void nvpw_call(const NVPA_Status status)
{
if (status != NVPA_STATUS_SUCCESS)
{
NVBENCH_THROW(std::runtime_error, "NVPW call returned error: {}", static_cast<std::underlying_type_t<NVPA_Status>>(status));
NVBENCH_THROW(std::runtime_error,
"NVPW call returned error: {}",
static_cast<std::underlying_type_t<NVPA_Status>>(status));
}
}

View File

@@ -18,9 +18,9 @@
#pragma once
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <vector>
@@ -38,7 +38,7 @@ class entropy_criterion final : public stopping_criterion_base
nvbench::detail::ring_buffer<nvbench::float64_t> m_entropy_tracker{299};
// Used to avoid re-allocating temporary memory
std::vector<nvbench::float64_t> m_probabilities;
std::vector<nvbench::float64_t> m_probabilities;
nvbench::float64_t compute_entropy();
@@ -49,7 +49,6 @@ protected:
virtual void do_initialize() override;
virtual void do_add_measurement(nvbench::float64_t measurement) override;
virtual bool do_is_finished() override;
};
} // namespace nvbench::detail

View File

@@ -21,7 +21,6 @@
#include <cmath>
namespace nvbench::detail
{
@@ -40,7 +39,7 @@ void entropy_criterion::do_initialize()
m_freq_tracker.clear();
}
nvbench::float64_t entropy_criterion::compute_entropy()
nvbench::float64_t entropy_criterion::compute_entropy()
{
const std::size_t n = m_freq_tracker.size();
if (n == 0)
@@ -70,15 +69,15 @@ void entropy_criterion::do_add_measurement(nvbench::float64_t measurement)
m_total_cuda_time += measurement;
{
auto key = measurement;
auto key = measurement;
constexpr bool bin_keys = false;
if (bin_keys)
if (bin_keys)
{
const auto resolution_us = 0.5;
const auto resulution_s = resolution_us / 1'000'000;
const auto epsilon = resulution_s * 2;
key = std::round(key / epsilon) * epsilon;
const auto resulution_s = resolution_us / 1 '000' 000;
const auto epsilon = resulution_s * 2;
key = std::round(key / epsilon) * epsilon;
}
// This approach is about 3x faster than `std::{unordered_,}map`
@@ -120,7 +119,7 @@ bool entropy_criterion::do_is_finished()
const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean);
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
if (statistics::slope2deg(slope) > m_params.get_float64("max-angle"))
{
return false;
}

View File

@@ -25,13 +25,13 @@
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <algorithm>
#include <chrono>
#include <limits>
#include <thread>
#include <fmt/format.h>
namespace nvbench::detail
{

View File

@@ -18,8 +18,6 @@
#pragma once
#include <cuda_runtime.h>
#include <nvbench/blocking_kernel.cuh>
#include <nvbench/cpu_timer.cuh>
#include <nvbench/cuda_call.cuh>
@@ -32,12 +30,13 @@
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <cuda_runtime.h>
#include <utility>
#include <vector>
#include "nvbench/types.cuh"
namespace nvbench
{

View File

@@ -19,13 +19,12 @@
#pragma once
#include <nvbench/cpu_timer.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/statistics.cuh>
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/statistics.cuh>
#include <utility>
#include <vector>
@@ -66,7 +65,7 @@ protected:
nvbench::cpu_timer m_walltime_timer;
nvbench::criterion_params m_criterion_params;
nvbench::stopping_criterion_base& m_stopping_criterion;
nvbench::stopping_criterion_base &m_stopping_criterion;
bool m_run_once{false};

View File

@@ -24,11 +24,11 @@
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <fmt/format.h>
#include <algorithm>
#include <limits>
#include <fmt/format.h>
namespace nvbench::detail
{
@@ -36,7 +36,8 @@ measure_cpu_only_base::measure_cpu_only_base(state &exec_state)
: m_state{exec_state}
, m_launch(m_state.get_cuda_stream())
, m_criterion_params{exec_state.get_criterion_params()}
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())}
, m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(
exec_state.get_stopping_criterion())}
, m_run_once{exec_state.get_run_once()}
, m_min_samples{exec_state.get_min_samples()}
, m_skip_time{exec_state.get_skip_time()}
@@ -72,7 +73,7 @@ void measure_cpu_only_base::run_trials_prologue() { m_walltime_timer.start(); }
void measure_cpu_only_base::record_measurements()
{
// Update and record timers and counters:
const auto cur_cpu_time = m_cpu_timer.get_duration();
const auto cur_cpu_time = m_cpu_timer.get_duration();
m_min_cpu_time = std::min(m_min_cpu_time, cur_cpu_time);
m_max_cpu_time = std::max(m_max_cpu_time, cur_cpu_time);
@@ -188,8 +189,7 @@ void measure_cpu_only_base::generate_summaries()
auto &summ = m_state.add_summary("nv/cpu_only/bw/global/bytes_per_second");
summ.set_string("name", "GlobalMem BW");
summ.set_string("hint", "byte_rate");
summ.set_string("description",
"Number of bytes read/written per second.");
summ.set_string("description", "Number of bytes read/written per second.");
summ.set_float64("value", avg_used_gmem_bw);
}
} // bandwidth
@@ -210,9 +210,9 @@ void measure_cpu_only_base::generate_summaries()
if (m_max_time_exceeded)
{
const auto timeout = m_walltime_timer.get_duration();
const auto timeout = m_walltime_timer.get_duration();
const auto max_noise = m_criterion_params.get_float64("max-noise");
const auto min_time = m_criterion_params.get_float64("min-time");
const auto min_time = m_criterion_params.get_float64("min-time");
if (cpu_noise > max_noise)
{

View File

@@ -24,13 +24,12 @@
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/cupti_profiler.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh>
#include <nvbench/detail/l2flush.cuh>
#include <nvbench/detail/statistics.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/exec_tag.cuh>
#include <nvbench/launch.cuh>
#include <cuda_runtime.h>

View File

@@ -16,9 +16,8 @@
* limitations under the License.
*/
#include <nvbench/detail/measure_hot.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/measure_hot.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/printer_base.cuh>

View File

@@ -19,12 +19,11 @@
#pragma once
#include <nvbench/config.cuh>
#include <nvbench/detail/statistics.cuh>
#include <cassert>
#include <cstddef>
#include <iterator>
#include <cassert>
#include <vector>
namespace nvbench::detail
@@ -76,14 +75,14 @@ public:
return temp;
}
ring_buffer_iterator operator+(difference_type n) const
{
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
ring_buffer_iterator operator+(difference_type n) const
{
return ring_buffer_iterator(m_index + n, m_capacity, m_ptr);
}
ring_buffer_iterator operator-(difference_type n) const
{
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
ring_buffer_iterator operator-(difference_type n) const
{
return ring_buffer_iterator(m_index - n, m_capacity, m_ptr);
}
difference_type operator-(const ring_buffer_iterator &other) const
@@ -121,13 +120,9 @@ private:
std::size_t m_index{0};
bool m_full{false};
std::size_t get_front_index() const
{
return m_full ? m_index : 0;
}
std::size_t get_front_index() const { return m_full ? m_index : 0; }
public:
/**
* Create a new ring buffer with the requested capacity.
*/

View File

@@ -16,15 +16,13 @@
* limitations under the License.
*/
#include <nvbench/detail/state_generator.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/state_generator.cuh>
#include <nvbench/detail/transform_reduce.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/named_values.cuh>
#include <nvbench/type_axis.cuh>
#include <nvbench/detail/transform_reduce.cuh>
#include <algorithm>
#include <cassert>
#include <functional>
@@ -165,7 +163,7 @@ void state_generator::build_axis_configs()
config.set_string(axis_info.axis, axis.get_input_string(axis_info.index));
}
} // type_si
} // type_axis_config generation
} // type_axis_config generation
// non_type_axis_config generation
{
@@ -201,9 +199,9 @@ void state_generator::build_axis_configs()
axes.get_string_axis(axis_info.axis).get_value(axis_info.index));
break;
} // switch (type)
} // for (axis_info : current_indices)
} // for non_type_sg configs
} // non_type_axis_config generation
} // for (axis_info : current_indices)
} // for non_type_sg configs
} // non_type_axis_config generation
}
void state_generator::build_states()

View File

@@ -26,12 +26,10 @@
#include <iterator>
#include <limits>
#include <numeric>
#include <cmath>
#include <type_traits>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#define M_PI 3.14159265358979323846
#endif
namespace nvbench::detail::statistics
@@ -154,7 +152,7 @@ nvbench::float64_t compute_r2(It first,
for (std::size_t i = 0; i < n; ++i, ++first)
{
const nvbench::float64_t y = *first;
const nvbench::float64_t y = *first;
const nvbench::float64_t y_pred = slope * static_cast<nvbench::float64_t>(i) + intercept;
ss_tot += (y - mean_y) * (y - mean_y);
@@ -179,19 +177,10 @@ compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t inter
return compute_r2(first, last, compute_mean(first, last), slope, intercept);
}
inline nvbench::float64_t rad2deg(nvbench::float64_t rad)
{
return rad * 180.0 / M_PI;
}
inline nvbench::float64_t rad2deg(nvbench::float64_t rad) { return rad * 180.0 / M_PI; }
inline nvbench::float64_t slope2rad(nvbench::float64_t slope)
{
return std::atan2(slope, 1.0);
}
inline nvbench::float64_t slope2rad(nvbench::float64_t slope) { return std::atan2(slope, 1.0); }
inline nvbench::float64_t slope2deg(nvbench::float64_t slope)
{
return rad2deg(slope2rad(slope));
}
inline nvbench::float64_t slope2deg(nvbench::float64_t slope) { return rad2deg(slope2rad(slope)); }
} // namespace nvbench::detail::statistics

View File

@@ -18,9 +18,9 @@
#pragma once
#include <nvbench/types.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/ring_buffer.cuh>
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include <vector>

View File

@@ -29,7 +29,7 @@ stdrel_criterion::stdrel_criterion()
void stdrel_criterion::do_initialize()
{
m_total_samples = 0;
m_total_samples = 0;
m_total_cuda_time = 0.0;
m_cuda_times.clear();
m_noise_tracker.clear();
@@ -46,7 +46,7 @@ void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement)
const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(),
m_cuda_times.cend(),
mean_cuda_time);
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time;
if (std::isfinite(cuda_rel_stdev))
{
m_noise_tracker.push_back(cuda_rel_stdev);

View File

@@ -19,6 +19,7 @@
#pragma once
#include <fmt/format.h>
#include <stdexcept>
#define NVBENCH_THROW(exception_type, format_str, ...) \

View File

@@ -16,13 +16,13 @@
* limitations under the License.
*/
#include <cuda_runtime.h>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/detail/timestamps_kernel.cuh>
#include <nvbench/types.cuh>
#include <cuda_runtime.h>
#include <cstdio>
#include <cstdlib>
@@ -71,12 +71,11 @@ void timestamps_kernel::record(const nvbench::cuda_stream &stream)
int num_sms = 0;
NVBENCH_CUDA_CALL(cudaGetDevice(&device_id));
NVBENCH_CUDA_CALL(
cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id));
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device_id));
get_timestamps_kernel<<<static_cast<unsigned int>(num_sms), 1, 0, stream.get_stream()>>>(
m_device_timestamps,
m_device_timestamps + 1);
}
} // namespace nvbench
} // namespace nvbench::detail

View File

@@ -82,7 +82,7 @@ struct cartesian_product<nvbench::type_list<nvbench::type_list<T, Tail...>, TL,
using tail_prod = typename detail::cartesian_product<nvbench::type_list<TL, TLTail...>>::type;
using cur = typename detail::prepend_each<T, tail_prod>::type;
using next = typename detail::cartesian_product<
nvbench::type_list<nvbench::type_list<Tail...>, TL, TLTail...>>::type;
nvbench::type_list<nvbench::type_list<Tail...>, TL, TLTail...>>::type;
using type = decltype(detail::concat(cur{}, next{}));
};

View File

@@ -16,11 +16,10 @@
* limitations under the License.
*/
#include <nvbench/device_info.cuh>
#include <nvbench/config.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/internal/nvml.cuh>
#include <cuda_runtime_api.h>

View File

@@ -18,17 +18,16 @@
#pragma once
#include <cuda_runtime_api.h>
#include <nvbench/config.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh>
#include <cuda_runtime_api.h>
#include <cstdint> // CHAR_BIT
#include <stdexcept>
#include <utility>
#include <string_view>
#include <utility>
// forward declare this for internal storage
struct nvmlDevice_st;

View File

@@ -16,13 +16,12 @@
* limitations under the License.
*/
#include <nvbench/device_manager.cuh>
#include <cuda_runtime_api.h>
#include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_manager.cuh>
#include <cuda_runtime_api.h>
namespace nvbench
{
@@ -45,13 +44,13 @@ device_manager::device_manager()
}
}
const nvbench::device_info &device_manager::get_device(int id)
{
if (id < 0)
const nvbench::device_info &device_manager::get_device(int id)
{
if (id < 0)
{
NVBENCH_THROW(std::runtime_error, "Negative index: {}.", id);
}
return m_devices.at(static_cast<std::size_t>(id));
return m_devices.at(static_cast<std::size_t>(id));
}
} // namespace nvbench

View File

@@ -101,10 +101,10 @@ using no_gpu_t = tag<nvbench::detail::exec_flag::no_gpu>;
using no_batch_t = tag<nvbench::detail::exec_flag::no_batch>;
using modifier_mask_t = tag<nvbench::detail::exec_flag::modifier_mask>;
using hot_t = tag<nvbench::detail::exec_flag::hot>;
using cold_t = tag<nvbench::detail::exec_flag::cold>;
using cpu_only_t = tag<nvbench::detail::exec_flag::cpu_only>;
using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
using hot_t = tag<nvbench::detail::exec_flag::hot>;
using cold_t = tag<nvbench::detail::exec_flag::cold>;
using cpu_only_t = tag<nvbench::detail::exec_flag::cpu_only>;
using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
constexpr inline none_t none;
constexpr inline timer_t timer;

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/axis_base.cuh>
#include <nvbench/types.cuh>
#include <vector>
@@ -40,7 +39,10 @@ struct float64_axis final : public axis_base
[[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; }
private:
std::unique_ptr<axis_base> do_clone() const final { return std::make_unique<float64_axis>(*this); }
std::unique_ptr<axis_base> do_clone() const final
{
return std::make_unique<float64_axis>(*this);
}
std::size_t do_get_size() const final { return m_values.size(); }
std::string do_get_input_string(std::size_t i) const final;
std::string do_get_description(std::size_t i) const final;

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/axis_base.cuh>
#include <nvbench/flags.cuh>
#include <nvbench/types.cuh>

View File

@@ -16,9 +16,8 @@
* limitations under the License.
*/
#include <nvbench/int64_axis.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/int64_axis.cuh>
#include <fmt/format.h>

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/detail/transform_reduce.cuh>
#include <nvbench/internal/table_builder.cuh>
#include <fmt/color.h>

View File

@@ -21,12 +21,12 @@
#include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#ifdef NVBENCH_HAS_NVML
#include <nvml.h>
#endif // NVBENCH_HAS_NVML
#include <fmt/format.h>
#include <stdexcept>
namespace nvbench::nvml
@@ -38,6 +38,7 @@ struct NVMLLifetimeManager
{
NVMLLifetimeManager();
~NVMLLifetimeManager();
private:
bool m_inited{false};
};

View File

@@ -16,24 +16,22 @@
* limitations under the License.
*/
#include <nvbench/json_printer.cuh>
#include <nvbench/axes_metadata.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/device_info.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/git_revision.cuh>
#include <nvbench/json_printer.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/version.cuh>
#include <nvbench/detail/throw.cuh>
#include <nlohmann/json.hpp>
#include <fmt/format.h>
#include <nlohmann/json.hpp>
#include <cstdint>
#include <fstream>
#include <iterator>
@@ -105,7 +103,7 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
default:
NVBENCH_THROW(std::runtime_error, "{}", "Unrecognized value type.");
} // end switch (value type)
} // end foreach value name
} // end foreach value name
}
} // end namespace
@@ -225,27 +223,26 @@ static void add_devices_section(nlohmann::ordered_json &root)
auto &devices = root["devices"];
for (const auto &dev_info : nvbench::device_manager::get().get_devices())
{
auto &device = devices.emplace_back();
device["id"] = dev_info.get_id();
device["name"] = dev_info.get_name();
device["sm_version"] = dev_info.get_sm_version();
device["ptx_version"] = dev_info.get_ptx_version();
device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate();
device["number_of_sms"] = dev_info.get_number_of_sms();
device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm();
device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm();
device["max_threads_per_block"] = dev_info.get_max_threads_per_block();
device["registers_per_sm"] = dev_info.get_registers_per_sm();
device["registers_per_block"] = dev_info.get_registers_per_block();
device["global_memory_size"] = dev_info.get_global_memory_size();
device["global_memory_bus_peak_clock_rate"] =
dev_info.get_global_memory_bus_peak_clock_rate();
device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width();
device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth();
device["l2_cache_size"] = dev_info.get_l2_cache_size();
device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm();
device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block();
device["ecc_state"] = dev_info.get_ecc_state();
auto &device = devices.emplace_back();
device["id"] = dev_info.get_id();
device["name"] = dev_info.get_name();
device["sm_version"] = dev_info.get_sm_version();
device["ptx_version"] = dev_info.get_ptx_version();
device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate();
device["number_of_sms"] = dev_info.get_number_of_sms();
device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm();
device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm();
device["max_threads_per_block"] = dev_info.get_max_threads_per_block();
device["registers_per_sm"] = dev_info.get_registers_per_sm();
device["registers_per_block"] = dev_info.get_registers_per_block();
device["global_memory_size"] = dev_info.get_global_memory_size();
device["global_memory_bus_peak_clock_rate"] = dev_info.get_global_memory_bus_peak_clock_rate();
device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width();
device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth();
device["l2_cache_size"] = dev_info.get_l2_cache_size();
device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm();
device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block();
device["ecc_state"] = dev_info.get_ecc_state();
}
}
@@ -298,8 +295,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
false;
#endif
} // "nvbench"
} // "version"
} // "meta"
} // "version"
} // "meta"
add_devices_section(root);
@@ -362,8 +359,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
default:
break;
} // end switch (axis type)
} // end foreach axis value
} // end foreach axis
} // end foreach axis value
} // end foreach axis
auto &states = bench["states"];
for (const auto &exec_state : bench_ptr->get_states())
@@ -431,8 +428,8 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches)
continue;
}
} // end foreach exec_state
} // end foreach benchmark
} // "benchmarks"
} // end foreach benchmark
} // "benchmarks"
m_ostream << root.dump(2) << "\n";
}
@@ -492,7 +489,7 @@ void json_printer::do_print_benchmark_list(const benchmark_vector &benches)
default:
break;
} // end switch (axis type)
} // end foreach axis value
} // end foreach axis value
}
} // end foreach bench

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/printer_base.cuh>
#include <nvbench/types.cuh>
#include <string>

View File

@@ -16,15 +16,13 @@
* limitations under the License.
*/
#include <nvbench/markdown_printer.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/internal/markdown_table.cuh>
#include <nvbench/markdown_printer.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/internal/markdown_table.cuh>
#include <fmt/color.h>
#include <fmt/format.h>
@@ -72,8 +70,12 @@ void markdown_printer::do_print_device_info()
"* Max Shared Memory: {} KiB/SM, {} KiB/Block\n",
device.get_shared_memory_per_sm() / 1024,
device.get_shared_memory_per_block() / 1024);
fmt::format_to(std::back_inserter(buffer), "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024);
fmt::format_to(std::back_inserter(buffer), "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm());
fmt::format_to(std::back_inserter(buffer),
"* L2 Cache Size: {} KiB\n",
device.get_l2_cache_size() / 1024);
fmt::format_to(std::back_inserter(buffer),
"* Maximum Active Blocks: {}/SM\n",
device.get_max_blocks_per_sm());
fmt::format_to(std::back_inserter(buffer),
"* Maximum Active Threads: {}/SM, {}/Block\n",
device.get_max_threads_per_sm(),
@@ -82,7 +84,9 @@ void markdown_printer::do_print_device_info()
"* Available Registers: {}/SM, {}/Block\n",
device.get_registers_per_sm(),
device.get_registers_per_block());
fmt::format_to(std::back_inserter(buffer), "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No");
fmt::format_to(std::back_inserter(buffer),
"* ECC Enabled: {}\n",
device.get_ecc_state() ? "Yes" : "No");
fmt::format_to(std::back_inserter(buffer), "\n");
}
m_ostream << fmt::to_string(buffer);
@@ -191,9 +195,12 @@ void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vec
{
desc = fmt::format(" ({})", desc);
}
fmt::format_to(std::back_inserter(buffer), " * `{}`{}\n", axis_ptr->get_input_string(i), desc);
fmt::format_to(std::back_inserter(buffer),
" * `{}`{}\n",
axis_ptr->get_input_string(i),
desc);
} // end foreach value
} // end foreach axis
} // end foreach axis
fmt::format_to(std::back_inserter(buffer), "\n");
} // end foreach bench

View File

@@ -16,10 +16,9 @@
* limitations under the License.
*/
#include <nvbench/named_values.cuh>
#include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/named_values.cuh>
#include <fmt/format.h>

View File

@@ -24,8 +24,8 @@
#include <nvbench/callable.cuh>
#include <nvbench/config.cuh>
#include <nvbench/cpu_timer.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/create.cuh>
#include <nvbench/criterion_manager.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/cuda_timer.cuh>

View File

@@ -34,6 +34,8 @@
#include <nvbench/internal/cli_help.cuh>
#include <nvbench/internal/cli_help_axis.cuh>
#include <fmt/format.h>
#include <algorithm>
#include <cassert>
#include <cstdlib>
@@ -44,12 +46,10 @@
#include <regex>
#include <stdexcept>
#include <string>
#include <string_view>
#include <tuple>
#include <vector>
#include <fmt/format.h>
#include <string_view>
namespace
{

View File

@@ -191,9 +191,9 @@ protected:
virtual void do_process_bulk_data_float64(nvbench::state &,
const std::string &,
const std::string &,
const std::vector<nvbench::float64_t> &){};
const std::vector<nvbench::float64_t> &) {};
virtual void do_print_benchmark_list(const benchmark_vector &)
virtual void do_print_benchmark_list(const benchmark_vector &)
{
throw std::runtime_error{"nvbench::do_print_benchmark_list is not supported by this printer."};
}

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/benchmark_base.cuh>
#include <nvbench/detail/state_generator.cuh>
#include <stdexcept>

View File

@@ -16,10 +16,9 @@
* limitations under the License.
*/
#include <nvbench/runner.cuh>
#include <nvbench/benchmark_base.cuh>
#include <nvbench/printer_base.cuh>
#include <nvbench/runner.cuh>
#include <nvbench/state.cuh>
#include <fmt/format.h>

View File

@@ -20,13 +20,13 @@
#include <nvbench/state.cuh>
#include <nvbench/types.cuh>
#include <fmt/color.h>
#include <fmt/format.h>
#include <algorithm>
#include <stdexcept>
#include <string>
#include <fmt/color.h>
#include <fmt/format.h>
namespace nvbench
{

View File

@@ -21,19 +21,21 @@
#include <nvbench/named_values.cuh>
#include <nvbench/types.cuh>
#include <string>
#include <initializer_list>
#include <string>
#include <unordered_map>
namespace nvbench
{
namespace detail
namespace detail
{
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation
constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds
constexpr nvbench::float64_t compat_max_noise()
{
return 0.005;
} // 0.5% relative standard deviation
} // namespace detail
@@ -43,6 +45,7 @@ constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relati
class criterion_params
{
nvbench::named_values m_named_values;
public:
criterion_params();
criterion_params(std::initializer_list<std::pair<std::string, nvbench::named_values::value_type>>);
@@ -96,7 +99,7 @@ public:
*
* This method is called once per benchmark run, before any measurements are provided.
*/
void initialize(const criterion_params &params)
void initialize(const criterion_params &params)
{
m_params.set_from(params);
this->do_initialize();
@@ -105,18 +108,12 @@ public:
/**
* Add the latest measurement to the criterion
*/
void add_measurement(nvbench::float64_t measurement)
{
this->do_add_measurement(measurement);
}
void add_measurement(nvbench::float64_t measurement) { this->do_add_measurement(measurement); }
/**
* Check if the criterion has been met for all measurements processed by `add_measurement`
*/
bool is_finished()
{
return this->do_is_finished();
}
bool is_finished() { return this->do_is_finished(); }
protected:
/**

View File

@@ -16,10 +16,8 @@
* limitations under the License.
*/
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/stopping_criterion.cuh>
namespace nvbench
{
@@ -62,7 +60,7 @@ void criterion_params::set_from(const criterion_params &other)
void criterion_params::set_int64(std::string name, nvbench::int64_t value)
{
if (m_named_values.has_value(name))
if (m_named_values.has_value(name))
{
m_named_values.remove_value(name);
}
@@ -72,7 +70,7 @@ void criterion_params::set_int64(std::string name, nvbench::int64_t value)
void criterion_params::set_float64(std::string name, nvbench::float64_t value)
{
if (m_named_values.has_value(name))
if (m_named_values.has_value(name))
{
m_named_values.remove_value(name);
}
@@ -82,7 +80,7 @@ void criterion_params::set_float64(std::string name, nvbench::float64_t value)
void criterion_params::set_string(std::string name, std::string value)
{
if (m_named_values.has_value(name))
if (m_named_values.has_value(name))
{
m_named_values.remove_value(name);
}
@@ -110,15 +108,11 @@ std::string criterion_params::get_string(const std::string &name) const
return m_named_values.get_string(name);
}
std::vector<std::string> criterion_params::get_names() const
{
return m_named_values.get_names();
}
std::vector<std::string> criterion_params::get_names() const { return m_named_values.get_names(); }
nvbench::named_values::type criterion_params::get_type(const std::string &name) const
{
return m_named_values.get_type(name);
}
} // namespace nvbench::detail
} // namespace nvbench

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/axis_base.cuh>
#include <nvbench/types.cuh>
#include <vector>

View File

@@ -19,7 +19,6 @@
#pragma once
#include <nvbench/axis_base.cuh>
#include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh>

View File

@@ -16,9 +16,8 @@
* limitations under the License.
*/
#include <nvbench/type_axis.cuh>
#include <nvbench/detail/throw.cuh>
#include <nvbench/type_axis.cuh>
#include <fmt/format.h>
#include <fmt/ranges.h>

View File

@@ -18,7 +18,7 @@
#pragma once
#include "detail/type_list_impl.cuh"
#include <nvbench/detail/type_list_impl.cuh>
#include <tuple>
#include <type_traits>

View File

@@ -27,11 +27,11 @@
#endif
#ifdef NVBENCH_CXXABI_DEMANGLE
#include <cxxabi.h>
#include <cstdlib>
#include <memory>
#include <cxxabi.h>
namespace
{
struct free_wrapper

View File

@@ -5,12 +5,11 @@ import math
import os
import sys
from colorama import Fore
import tabulate
from colorama import Fore
from nvbench_json import reader
# Parse version string into tuple, "x.y.z" -> (x, y, z)
def version_tuple(v):
return tuple(map(int, (v.split("."))))
@@ -139,15 +138,14 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
colalign.append("center")
for device_id in device_ids:
rows = []
plot_data = {'cmp': {}, 'ref': {}, 'cmp_noise': {}, 'ref_noise': {}}
plot_data = {"cmp": {}, "ref": {}, "cmp_noise": {}, "ref_noise": {}}
for cmp_state in cmp_states:
cmp_state_name = cmp_state["name"]
ref_state = next(filter(lambda st: st["name"] == cmp_state_name,
ref_states),
None)
ref_state = next(
filter(lambda st: st["name"] == cmp_state_name, ref_states), None
)
if not ref_state:
continue
@@ -158,9 +156,7 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
row = []
for axis_value in axis_values:
axis_value_name = axis_value["name"]
row.append(format_axis_value(axis_value_name,
axis_value,
axes))
row.append(format_axis_value(axis_value_name, axis_value, axes))
cmp_summaries = cmp_state["summaries"]
ref_summaries = ref_state["summaries"]
@@ -171,23 +167,37 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
def lookup_summary(summaries, tag):
return next(filter(lambda s: s["tag"] == tag, summaries), None)
cmp_time_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/mean")
ref_time_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/mean")
cmp_noise_summary = lookup_summary(cmp_summaries, "nv/cold/time/gpu/stdev/relative")
ref_noise_summary = lookup_summary(ref_summaries, "nv/cold/time/gpu/stdev/relative")
cmp_time_summary = lookup_summary(
cmp_summaries, "nv/cold/time/gpu/mean"
)
ref_time_summary = lookup_summary(
ref_summaries, "nv/cold/time/gpu/mean"
)
cmp_noise_summary = lookup_summary(
cmp_summaries, "nv/cold/time/gpu/stdev/relative"
)
ref_noise_summary = lookup_summary(
ref_summaries, "nv/cold/time/gpu/stdev/relative"
)
# TODO: Use other timings, too. Maybe multiple rows, with a
# "Timing" column + values "CPU/GPU/Batch"?
if not all([cmp_time_summary,
ref_time_summary,
cmp_noise_summary,
ref_noise_summary]):
if not all(
[
cmp_time_summary,
ref_time_summary,
cmp_noise_summary,
ref_noise_summary,
]
):
continue
def extract_value(summary):
summary_data = summary["data"]
value_data = next(filter(lambda v: v["name"] == "value", summary_data))
assert(value_data["type"] == "float64")
value_data = next(
filter(lambda v: v["name"] == "value", summary_data)
)
assert value_data["type"] == "float64"
return value_data["value"]
cmp_time = extract_value(cmp_time_summary)
@@ -218,23 +228,27 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
if plot:
axis_name = []
axis_value = "--"
for aid in range(len(axis_values)):
for aid in range(len(axis_values)):
if axis_values[aid]["name"] != plot:
axis_name.append("{} = {}".format(axis_values[aid]["name"], axis_values[aid]["value"]))
axis_name.append(
"{} = {}".format(
axis_values[aid]["name"], axis_values[aid]["value"]
)
)
else:
axis_value = float(axis_values[aid]["value"])
axis_name = ', '.join(axis_name)
axis_value = float(axis_values[aid]["value"])
axis_name = ", ".join(axis_name)
if axis_name not in plot_data['cmp']:
plot_data['cmp'][axis_name] = {}
plot_data['ref'][axis_name] = {}
plot_data['cmp_noise'][axis_name] = {}
plot_data['ref_noise'][axis_name] = {}
if axis_name not in plot_data["cmp"]:
plot_data["cmp"][axis_name] = {}
plot_data["ref"][axis_name] = {}
plot_data["cmp_noise"][axis_name] = {}
plot_data["ref_noise"][axis_name] = {}
plot_data['cmp'][axis_name][axis_value] = cmp_time
plot_data['ref'][axis_name][axis_value] = ref_time
plot_data['cmp_noise'][axis_name][axis_value] = cmp_noise
plot_data['ref_noise'][axis_name][axis_value] = ref_noise
plot_data["cmp"][axis_name][axis_value] = cmp_time
plot_data["ref"][axis_name][axis_value] = ref_time
plot_data["cmp_noise"][axis_name][axis_value] = cmp_noise
plot_data["ref_noise"][axis_name][axis_value] = ref_noise
global config_count
global unknown_count
@@ -273,14 +287,13 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
print("## [%d] %s\n" % (device["id"], device["name"]))
# colalign and github format require tabulate 0.8.3
if tabulate_version >= (0, 8, 3):
print(tabulate.tabulate(rows,
headers=headers,
colalign=colalign,
tablefmt="github"))
print(
tabulate.tabulate(
rows, headers=headers, colalign=colalign, tablefmt="github"
)
)
else:
print(tabulate.tabulate(rows,
headers=headers,
tablefmt="markdown"))
print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
print("")
@@ -295,18 +308,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
x = [float(x) for x in plot_data[key][axis].keys()]
y = list(plot_data[key][axis].values())
noise = list(plot_data[key + '_noise'][axis].values())
noise = list(plot_data[key + "_noise"][axis].values())
top = [y[i] + y[i] * noise[i] for i in range(len(x))]
bottom = [y[i] - y[i] * noise[i] for i in range(len(x))]
p = plt.plot(x, y, shape, marker='o', label=label)
p = plt.plot(x, y, shape, marker="o", label=label)
plt.fill_between(x, bottom, top, color=p[0].get_color(), alpha=0.1)
for axis in plot_data['cmp'].keys():
plot_line('cmp', '-', axis)
plot_line('ref', '--', axis + ' ref')
for axis in plot_data["cmp"].keys():
plot_line("cmp", "-", axis)
plot_line("ref", "--", axis + " ref")
plt.legend()
plt.show()
@@ -314,11 +326,17 @@ def compare_benches(ref_benches, cmp_benches, threshold, plot):
def main():
help_text = "%(prog)s [reference.json compare.json | reference_dir/ compare_dir/]"
parser = argparse.ArgumentParser(prog='nvbench_compare', usage=help_text)
parser.add_argument('--threshold-diff', type=float, dest='threshold', default=0.0,
help='only show benchmarks where percentage diff is >= THRESHOLD')
parser.add_argument('--plot-along', type=str, dest='plot', default=None,
help='plot results')
parser = argparse.ArgumentParser(prog="nvbench_compare", usage=help_text)
parser.add_argument(
"--threshold-diff",
type=float,
dest="threshold",
default=0.0,
help="only show benchmarks where percentage diff is >= THRESHOLD",
)
parser.add_argument(
"--plot-along", type=str, dest="plot", default=None, help="plot results"
)
args, files_or_dirs = parser.parse_known_args()
print(files_or_dirs)
@@ -336,14 +354,17 @@ def main():
continue
r = os.path.join(files_or_dirs[0], f)
c = os.path.join(files_or_dirs[1], f)
if os.path.isfile(r) and os.path.isfile(c) and \
os.path.getsize(r) > 0 and os.path.getsize(c) > 0:
if (
os.path.isfile(r)
and os.path.isfile(c)
and os.path.getsize(r) > 0
and os.path.getsize(c) > 0
):
to_compare.append((r, c))
else:
to_compare = [(files_or_dirs[0], files_or_dirs[1])]
for ref, comp in to_compare:
ref_root = reader.read_file(ref)
cmp_root = reader.read_file(comp)
@@ -355,7 +376,9 @@ def main():
print("Device sections do not match.")
sys.exit(1)
compare_benches(ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot)
compare_benches(
ref_root["benchmarks"], cmp_root["benchmarks"], args.threshold, args.plot
)
print("# Summary\n")
print("- Total Matches: %d" % config_count)
@@ -365,5 +388,5 @@ def main():
return failure_count
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,19 +1,19 @@
#!/usr/bin/env python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import argparse
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from nvbench_json import reader
def parse_files():
help_text = "%(prog)s [nvbench.out.json | dir/] ..."
parser = argparse.ArgumentParser(prog='nvbench_histogram', usage=help_text)
parser = argparse.ArgumentParser(prog="nvbench_histogram", usage=help_text)
args, files_or_dirs = parser.parse_known_args()
@@ -41,14 +41,14 @@ def parse_files():
def extract_filename(summary):
summary_data = summary["data"]
value_data = next(filter(lambda v: v["name"] == "filename", summary_data))
assert(value_data["type"] == "string")
assert value_data["type"] == "string"
return value_data["value"]
def extract_size(summary):
summary_data = summary["data"]
value_data = next(filter(lambda v: v["name"] == "size", summary_data))
assert(value_data["type"] == "int64")
assert value_data["type"] == "int64"
return int(value_data["value"])
@@ -57,9 +57,10 @@ def parse_samples_meta(filename, state):
if not summaries:
return None, None
summary = next(filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times",
summaries),
None)
summary = next(
filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", summaries),
None,
)
if not summary:
return None, None
@@ -81,7 +82,7 @@ def parse_samples(filename, state):
with open(samples_filename, "rb") as f:
samples = np.fromfile(f, "<f4")
assert (sample_count == len(samples))
assert sample_count == len(samples)
return samples
@@ -118,5 +119,5 @@ def main():
plt.show()
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,2 +1,3 @@
from . import reader
from . import version
from . import reader, version
__all__ = ["reader", "version"]

View File

@@ -1,8 +1,8 @@
file_version = (1, 0, 0)
file_version_string = "{}.{}.{}".format(file_version[0],
file_version[1],
file_version[2])
file_version_string = "{}.{}.{}".format(
file_version[0], file_version[1], file_version[2]
)
def check_file_version(filename, root_node):
@@ -19,8 +19,14 @@ def check_file_version(filename, root_node):
# for now just warn on mismatch.
if version_node["string"] != file_version_string:
print("WARNING:")
print(" {} was written using a different NVBench JSON file version."
.format(filename))
print(
" {} was written using a different NVBench JSON file version.".format(
filename
)
)
print(" It may not read correctly.")
print(" (file version: {} reader version: {})"
.format(version_node["string"], file_version_string))
print(
" (file version: {} reader version: {})".format(
version_node["string"], file_version_string
)
)

View File

@@ -5,9 +5,8 @@ import math
import os
import sys
from nvbench_json import reader
import tabulate
from nvbench_json import reader
# Parse version string into tuple, "x.y.z" -> (x, y, z)
@@ -39,7 +38,8 @@ def format_walltime(seconds_in):
"{:0>2d}:".format(h) if h > 1e-9 else "",
"{:0>2d}:".format(m) if (h > 1e-9 or m > 1e-9) else "",
"{:0>2d}.".format(s) if (h > 1e-9 or m > 1e-9) else "{:d}.".format(s),
"{:0>3d}".format(ms))
"{:0>3d}".format(ms),
)
def format_percentage(percentage):
@@ -58,7 +58,7 @@ measure_column_names = {"cold": "Isolated", "batch": "Batch", "cupti": "CUPTI"}
def init_measures():
out = {}
for name in measure_names:
out[name] = 0.
out[name] = 0.0
return out
@@ -67,17 +67,17 @@ def get_measures(state):
times = {}
for name in measure_names:
measure_walltime_tag = "nv/{}/walltime".format(name)
summary = next(filter(lambda s: s["tag"] == measure_walltime_tag,
summaries),
None)
summary = next(
filter(lambda s: s["tag"] == measure_walltime_tag, summaries), None
)
if not summary:
continue
walltime_data = next(filter(lambda d: d["name"] == "value", summary["data"]))
assert(walltime_data["type"] == "float64")
assert walltime_data["type"] == "float64"
walltime = walltime_data["value"]
walltime = float(walltime)
times[name] = walltime if walltime else 0.
times[name] = walltime if walltime else 0.0
return times
@@ -87,7 +87,7 @@ def merge_measures(target, src):
def sum_measures(measures):
total_time = 0.
total_time = 0.0
for time in measures.values():
total_time += time
return total_time
@@ -194,20 +194,21 @@ def print_overview_section(data):
# colalign and github format require tabulate 0.8.3
if tabulate_version >= (0, 8, 3):
print(tabulate.tabulate(rows,
headers=headers,
colalign=colalign,
tablefmt="github"))
print(
tabulate.tabulate(
rows, headers=headers, colalign=colalign, tablefmt="github"
)
)
else:
print(tabulate.tabulate(rows,
headers=headers,
tablefmt="markdown"))
print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
print()
# append_data_row_lambda args: (row_list, name, items[name])
def print_measures_table(headers, colalign, items, total_measures, append_item_row_lambda):
def print_measures_table(
headers, colalign, items, total_measures, append_item_row_lambda
):
total_time = sum_measures(total_measures)
active_measures = get_active_measure_names(total_measures)
num_user_columns = len(headers)
@@ -248,14 +249,13 @@ def print_measures_table(headers, colalign, items, total_measures, append_item_r
# colalign and github format require tabulate 0.8.3
if tabulate_version >= (0, 8, 3):
print(tabulate.tabulate(rows,
headers=headers,
colalign=colalign,
tablefmt="github"))
print(
tabulate.tabulate(
rows, headers=headers, colalign=colalign, tablefmt="github"
)
)
else:
print(tabulate.tabulate(rows,
headers=headers,
tablefmt="markdown"))
print(tabulate.tabulate(rows, headers=headers, tablefmt="markdown"))
def print_files_section(data):
@@ -319,7 +319,7 @@ def print_bench_section(bench_name, bench):
def main():
help_text = "%(prog)s [nvbench.out.json | dir/]..."
parser = argparse.ArgumentParser(prog='nvbench_walltime', usage=help_text)
parser = argparse.ArgumentParser(prog="nvbench_walltime", usage=help_text)
args, files_or_dirs = parser.parse_known_args()
@@ -353,5 +353,5 @@ def main():
print_files_section(data)
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

View File

@@ -17,22 +17,19 @@
*/
#include <nvbench/axes_metadata.cuh>
#include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include <algorithm>
#include <string_view>
using int_list = nvbench::type_list<nvbench::int8_t,
nvbench::int16_t,
nvbench::int32_t,
nvbench::int64_t>;
#include "test_asserts.cuh"
using int_list =
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
@@ -110,7 +107,6 @@ void test_default_type_axes_names()
ASSERT(axes.get_type_axis(4).get_name() == "T4");
ASSERT(axes.get_type_axis(4).get_axis_index() == 4);
}
}
void test_type_axes()
@@ -138,8 +134,7 @@ void test_type_axes()
fmt::format_to(std::back_inserter(buffer),
" - {}{}\n",
input_string,
description.empty() ? ""
: fmt::format(" ({})", description));
description.empty() ? "" : fmt::format(" ({})", description));
}
}
@@ -157,9 +152,8 @@ Axis: Other
)expected";
const std::string test = fmt::to_string(buffer);
const auto diff =
std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend());
const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
const auto diff = std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend());
const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
ASSERT_MSG(test == ref,
"Differs at character {}.\n"
"Expected:\n\"{}\"\n\n"
@@ -189,9 +183,7 @@ void test_float64_axes()
void test_int64_axes()
{
nvbench::axes_metadata axes;
axes.add_int64_axis("I64 Axis",
{10, 11, 12, 13, 14},
nvbench::int64_axis_flags::none);
axes.add_int64_axis("I64 Axis", {10, 11, 12, 13, 14}, nvbench::int64_axis_flags::none);
ASSERT(axes.get_axes().size() == 1);
const auto &axis = axes.get_int64_axis("I64 Axis");
ASSERT(axis.get_size() == 5);
@@ -205,9 +197,7 @@ void test_int64_axes()
void test_int64_power_of_two_axes()
{
nvbench::axes_metadata axes;
axes.add_int64_axis("I64 POT Axis",
{1, 2, 3, 4, 5},
nvbench::int64_axis_flags::power_of_two);
axes.add_int64_axis("I64 POT Axis", {1, 2, 3, 4, 5}, nvbench::int64_axis_flags::power_of_two);
ASSERT(axes.get_axes().size() == 1);
const auto &axis = axes.get_int64_axis("I64 POT Axis");
ASSERT(axis.get_size() == 5);

View File

@@ -17,7 +17,6 @@
*/
#include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh>
#include <nvbench/named_values.cuh>
#include <nvbench/state.cuh>
@@ -25,8 +24,6 @@
#include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include <algorithm>
@@ -34,6 +31,8 @@
#include <variant>
#include <vector>
#include "test_asserts.cuh"
template <typename T>
std::vector<T> sort(std::vector<T> &&vec)
{
@@ -61,34 +60,26 @@ void no_op_generator(nvbench::state &state)
NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable);
template <typename Integer, typename Float, typename Other>
void template_no_op_generator(nvbench::state &state,
nvbench::type_list<Integer, Float, Other>)
void template_no_op_generator(nvbench::state &state, nvbench::type_list<Integer, Float, Other>)
{
ASSERT(nvbench::type_strings<Integer>::input_string() ==
state.get_string("Integer"));
ASSERT(nvbench::type_strings<Float>::input_string() ==
state.get_string("Float"));
ASSERT(nvbench::type_strings<Other>::input_string() ==
state.get_string("Other"));
ASSERT(nvbench::type_strings<Integer>::input_string() == state.get_string("Integer"));
ASSERT(nvbench::type_strings<Float>::input_string() == state.get_string("Float"));
ASSERT(nvbench::type_strings<Other>::input_string() == state.get_string("Other"));
// Enum params using non-templated version:
no_op_generator(state);
}
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
template_no_op_callable);
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable);
using int_list = nvbench::type_list<nvbench::int8_t,
nvbench::int16_t,
nvbench::int32_t,
nvbench::int64_t>;
using int_list =
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
using float_list = nvbench::type_list<nvbench::float32_t, nvbench::float64_t>;
using misc_list = nvbench::type_list<bool, void>;
using lots_of_types_bench =
nvbench::benchmark<template_no_op_callable,
nvbench::type_list<int_list, float_list, misc_list>>;
nvbench::benchmark<template_no_op_callable, nvbench::type_list<int_list, float_list, misc_list>>;
using no_types_bench = nvbench::benchmark<no_op_callable>;
@@ -110,8 +101,7 @@ void test_type_axes()
fmt::format_to(std::back_inserter(buffer),
" - {}{}\n",
input_string,
description.empty() ? ""
: fmt::format(" ({})", description));
description.empty() ? "" : fmt::format(" ({})", description));
}
}
@@ -300,9 +290,7 @@ void test_get_config_count()
auto const num_devices = bench.get_devices().size();
ASSERT_MSG(bench.get_config_count() == 72 * num_devices,
"Got {}",
bench.get_config_count());
ASSERT_MSG(bench.get_config_count() == 72 * num_devices, "Got {}", bench.get_config_count());
}
int main()

View File

@@ -18,11 +18,11 @@
#include <nvbench/cpu_timer.cuh>
#include "test_asserts.cuh"
#include <chrono>
#include <thread>
#include "test_asserts.cuh"
void test_basic()
{
using namespace std::literals::chrono_literals;

View File

@@ -16,17 +16,14 @@
* limitations under the License.
*/
#include <nvbench/create.cuh>
#include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh>
#include <nvbench/create.cuh>
#include <nvbench/state.cuh>
#include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include <algorithm>
@@ -34,6 +31,8 @@
#include <variant>
#include <vector>
#include "test_asserts.cuh"
template <typename T>
std::vector<T> sort(std::vector<T> &&vec)
{
@@ -72,15 +71,11 @@ using misc_types = nvbench::type_list<bool, void>;
using type_axes = nvbench::type_list<float_types, int_types, misc_types>;
template <typename FloatT, typename IntT, typename MiscT>
void template_no_op_generator(nvbench::state &state,
nvbench::type_list<FloatT, IntT, MiscT>)
void template_no_op_generator(nvbench::state &state, nvbench::type_list<FloatT, IntT, MiscT>)
{
ASSERT(nvbench::type_strings<FloatT>::input_string() ==
state.get_string("FloatT"));
ASSERT(nvbench::type_strings<IntT>::input_string() ==
state.get_string("IntT"));
ASSERT(nvbench::type_strings<IntT>::input_string() ==
state.get_string("IntT"));
ASSERT(nvbench::type_strings<FloatT>::input_string() == state.get_string("FloatT"));
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
// Enum params using non-templated version:
no_op_generator(state);
@@ -116,8 +111,7 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench,
void validate_default_name()
{
auto bench =
nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone();
auto bench = nvbench::benchmark_manager::get().get_benchmark("no_op_generator").clone();
const std::string ref = "Params:\n";
@@ -127,8 +121,7 @@ void validate_default_name()
void validate_custom_name()
{
auto bench =
nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone();
auto bench = nvbench::benchmark_manager::get().get_benchmark("Custom Name").clone();
const std::string ref = "Params:\n";
@@ -138,8 +131,7 @@ void validate_custom_name()
void validate_no_types()
{
auto bench =
nvbench::benchmark_manager::get().get_benchmark("No Types").clone();
auto bench = nvbench::benchmark_manager::get().get_benchmark("No Types").clone();
const std::string ref = R"expected(Params: Float: 11 Int: 1 String: One
Params: Float: 11 Int: 2 String: One
@@ -176,8 +168,7 @@ Params: Float: 13 Int: 3 String: Three
void validate_only_types()
{
auto bench =
nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone();
auto bench = nvbench::benchmark_manager::get().get_benchmark("Oops, All Types!").clone();
const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool
Params: FloatT: F32 IntT: I32 MiscT: void
@@ -195,8 +186,7 @@ Params: FloatT: F64 IntT: I64 MiscT: void
void validate_all_axes()
{
auto bench =
nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone();
auto bench = nvbench::benchmark_manager::get().get_benchmark("All The Axes").clone();
const std::string ref =
R"expected(Params: Float: 11 FloatT: F32 Int: 1 IntT: I32 MiscT: bool String: One

View File

@@ -42,27 +42,34 @@ protected:
void test_no_duplicates_are_allowed()
{
nvbench::criterion_manager& manager = nvbench::criterion_manager::get();
bool exception_triggered = false;
nvbench::criterion_manager &manager = nvbench::criterion_manager::get();
bool exception_triggered = false;
try {
[[maybe_unused]] nvbench::stopping_criterion_base& _ = manager.get_criterion("custom");
} catch(...) {
try
{
[[maybe_unused]] nvbench::stopping_criterion_base &_ = manager.get_criterion("custom");
}
catch (...)
{
exception_triggered = true;
}
ASSERT(exception_triggered);
std::unique_ptr<custom_criterion> custom_ptr = std::make_unique<custom_criterion>();
custom_criterion* custom_raw = custom_ptr.get();
custom_criterion *custom_raw = custom_ptr.get();
ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw);
nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom");
nvbench::stopping_criterion_base &custom =
nvbench::criterion_manager::get().get_criterion("custom");
ASSERT(custom_raw == &custom);
exception_triggered = false;
try {
try
{
manager.add(std::make_unique<custom_criterion>());
} catch(...) {
}
catch (...)
{
exception_triggered = true;
}
ASSERT(exception_triggered);

View File

@@ -60,4 +60,3 @@ int main()
test_compat_overwrite();
test_overwrite();
}

View File

@@ -16,19 +16,16 @@
* limitations under the License.
*/
#include <nvbench/cuda_timer.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/cuda_timer.cuh>
#include <nvbench/test_kernels.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
void test_basic(cudaStream_t time_stream,
cudaStream_t exec_stream,
bool expected)
#include "test_asserts.cuh"
void test_basic(cudaStream_t time_stream, cudaStream_t exec_stream, bool expected)
{
nvbench::cuda_timer timer;

View File

@@ -16,8 +16,8 @@
* limitations under the License.
*/
#include <nvbench/cuda_call.cuh>
#include <nvbench/nvbench.cuh>
#include "nvbench/cuda_call.cuh"
/******************************************************************************
* Install custom parser.
@@ -35,7 +35,7 @@
// User code to handle a specific argument:
void handle_my_custom_arg();
// NVBench hook for modiifying the command line arguments before parsing:
// NVBench hook for modifying the command line arguments before parsing:
void custom_arg_handler(std::vector<std::string> &args)
{
// Handle and remove "--my-custom-arg"

View File

@@ -19,8 +19,8 @@
#include <nvbench/nvbench.cuh>
#include <algorithm>
#include <cstdlib>
#include <cstdio>
#include <cstdlib>
/******************************************************************************
* Test having global state that is initialized and finalized via RAII.

View File

@@ -29,12 +29,10 @@
void noisy_bench(nvbench::state &state)
{
// time, convert ms -> s
const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) /
1000.f;
const auto mean = static_cast<nvbench::float32_t>(state.get_float64("Mean")) / 1000.f;
// rel stdev
const auto noise_pct =
static_cast<nvbench::float32_t>(state.get_float64("Noise"));
const auto noise = noise_pct / 100.f;
const auto noise_pct = static_cast<nvbench::float32_t>(state.get_float64("Noise"));
const auto noise = noise_pct / 100.f;
// abs stdev
const auto stdev = noise * mean;
@@ -53,8 +51,7 @@ void noisy_bench(nvbench::state &state)
try
{
return static_cast<nvbench::float32_t>(
state.get_summary("nv/cold/time/gpu/stdev/relative")
.get_float64("value"));
state.get_summary("nv/cold/time/gpu/stdev/relative").get_float64("value"));
}
catch (std::invalid_argument &)
{

View File

@@ -20,11 +20,11 @@
#include <nvbench/stopping_criterion.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <vector>
#include <random>
#include <numeric>
#include <random>
#include <vector>
#include "test_asserts.cuh"
void test_const()
{
@@ -32,7 +32,7 @@ void test_const()
nvbench::detail::entropy_criterion criterion;
criterion.initialize(params);
for (int i = 0; i < 6; i++)
for (int i = 0; i < 6; i++)
{ // nvbench wants at least 5 to compute the standard deviation
criterion.add_measurement(42.0);
}
@@ -48,7 +48,7 @@ void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion)
* 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+
* 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 |
* 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 |
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
* 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample,
* 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative
* 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 |
* 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+

View File

@@ -18,12 +18,12 @@
#include <nvbench/enum_type_list.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include <type_traits>
#include "test_asserts.cuh"
// If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39.
#if defined(__GNUC__) && __GNUC__ == 7
#define USING_GCC_7
@@ -102,8 +102,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
void test_int()
{
ASSERT((std::is_same_v<nvbench::enum_type_list<>, nvbench::type_list<>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<0>,
nvbench::type_list<nvbench::enum_type<0>>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<0>, nvbench::type_list<nvbench::enum_type<0>>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<0, 1, 2, 3, 4>,
nvbench::type_list<nvbench::enum_type<0>,
nvbench::enum_type<1>,
@@ -115,42 +114,35 @@ void test_int()
void test_scoped_enum()
{
#ifndef USING_GCC_7
ASSERT((
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>,
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>,
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
#endif
ASSERT((
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1,
scoped_enum::val_2,
scoped_enum::val_3>,
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>,
nvbench::enum_type<scoped_enum::val_2>,
nvbench::enum_type<scoped_enum::val_3>>>));
ASSERT((std::is_same_v<
nvbench::enum_type_list<scoped_enum::val_1, scoped_enum::val_2, scoped_enum::val_3>,
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>,
nvbench::enum_type<scoped_enum::val_2>,
nvbench::enum_type<scoped_enum::val_3>>>));
}
void test_unscoped_enum()
{
#ifndef USING_GCC_7
ASSERT(
(std::is_same_v<nvbench::enum_type_list<unscoped_val_1>,
nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>));
ASSERT(
(std::is_same_v<
nvbench::enum_type_list<unscoped_val_1, unscoped_val_2, unscoped_val_3>,
nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
nvbench::enum_type<unscoped_val_2>,
nvbench::enum_type<unscoped_val_3>>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<unscoped_val_1>,
nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>));
ASSERT((std::is_same_v<nvbench::enum_type_list<unscoped_val_1, unscoped_val_2, unscoped_val_3>,
nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
nvbench::enum_type<unscoped_val_2>,
nvbench::enum_type<unscoped_val_3>>>));
#endif
}
void test_scoped_enum_type_strings()
{
using values = nvbench::enum_type_list<scoped_enum::val_1,
scoped_enum::val_2,
scoped_enum::val_3>;
using val_1 = nvbench::tl::get<0, values>;
using val_2 = nvbench::tl::get<1, values>;
using val_3 = nvbench::tl::get<2, values>;
using values =
nvbench::enum_type_list<scoped_enum::val_1, scoped_enum::val_2, scoped_enum::val_3>;
using val_1 = nvbench::tl::get<0, values>;
using val_2 = nvbench::tl::get<1, values>;
using val_3 = nvbench::tl::get<2, values>;
ASSERT((nvbench::type_strings<val_1>::input_string() == "1"));
ASSERT((nvbench::type_strings<val_1>::description() == "scoped_enum::val_1"));
ASSERT((nvbench::type_strings<val_2>::input_string() == "2"));

View File

@@ -34,8 +34,7 @@ void test_empty()
const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr);
const auto *clone =
dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
const auto *clone = dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "Empty");
@@ -62,8 +61,7 @@ void test_basic()
const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr);
const auto *clone =
dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
const auto *clone = dynamic_cast<const nvbench::float64_axis *>(clone_base.get());
ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "Basic");

View File

@@ -18,10 +18,10 @@
#include <nvbench/int64_axis.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include "test_asserts.cuh"
void test_empty()
{
nvbench::int64_axis axis("Empty");
@@ -36,8 +36,7 @@ void test_empty()
const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr);
const auto *clone =
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "Empty");
@@ -66,8 +65,7 @@ void test_basic()
const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr);
const auto *clone =
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "BasicAxis");
@@ -87,8 +85,7 @@ void test_basic()
void test_power_of_two()
{
nvbench::int64_axis axis{"POTAxis"};
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4},
nvbench::int64_axis_flags::power_of_two);
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
@@ -102,14 +99,12 @@ void test_power_of_two()
for (size_t i = 0; i < 8; ++i)
{
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(axis.get_description(i) ==
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
}
const auto clone_base = axis.clone();
ASSERT(clone_base.get() != nullptr);
const auto *clone =
dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
const auto *clone = dynamic_cast<const nvbench::int64_axis *>(clone_base.get());
ASSERT(clone != nullptr);
ASSERT(clone->get_name() == "POTAxis");
@@ -122,8 +117,7 @@ void test_power_of_two()
for (size_t i = 0; i < 8; ++i)
{
ASSERT(clone->get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(clone->get_description(i) ==
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
ASSERT(clone->get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
}
}
@@ -250,8 +244,7 @@ void test_update_none_to_pow2()
void test_update_pow2_to_none()
{
nvbench::int64_axis axis{"TestAxis"};
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4},
nvbench::int64_axis_flags::power_of_two);
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
@@ -304,8 +297,7 @@ void test_update_pow2_to_none()
for (size_t i = 0; i < 8; ++i)
{
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(axis.get_description(i) ==
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
}
}
@@ -313,8 +305,7 @@ void test_update_pow2_to_pow2()
{
nvbench::int64_axis axis{"TestAxis"};
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4},
nvbench::int64_axis_flags::power_of_two);
axis.set_inputs({0, 1, 2, 3, 7, 6, 5, 4}, nvbench::int64_axis_flags::power_of_two);
const std::vector<nvbench::int64_t> ref_inputs{0, 1, 2, 3, 7, 6, 5, 4};
const std::vector<nvbench::int64_t> ref_values{1, 2, 4, 8, 128, 64, 32, 16};
@@ -369,8 +360,7 @@ void test_update_pow2_to_pow2()
for (size_t i = 0; i < 8; ++i)
{
ASSERT(axis.get_input_string(i) == fmt::to_string(ref_inputs[i]));
ASSERT(axis.get_description(i) ==
fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
ASSERT(axis.get_description(i) == fmt::format("2^{} = {}", ref_inputs[i], ref_values[i]));
}
}

View File

@@ -18,10 +18,10 @@
#include <nvbench/named_values.cuh>
#include "test_asserts.cuh"
#include <algorithm>
#include "test_asserts.cuh"
void test_empty()
{
nvbench::named_values vals;

View File

@@ -16,15 +16,14 @@
* limitations under the License.
*/
#include <nvbench/option_parser.cuh>
#include <nvbench/create.cuh>
#include <nvbench/option_parser.cuh>
#include <nvbench/type_list.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include "test_asserts.cuh"
//==============================================================================
// Declare a couple benchmarks for testing:
void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); }
@@ -50,8 +49,7 @@ NVBENCH_BENCH_TYPES(TestBench, NVBENCH_TYPE_AXES(Ts, Us))
namespace
{
[[nodiscard]] std::string
states_to_string(const std::vector<nvbench::state> &states)
[[nodiscard]] std::string states_to_string(const std::vector<nvbench::state> &states)
{
fmt::memory_buffer buffer;
std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} "
@@ -88,7 +86,7 @@ states_to_string(const std::vector<nvbench::state> &states)
// Expects the parser to have a single TestBench benchmark. Runs the benchmark
// and returns the resulting states.
[[nodiscard]] const auto& parser_to_states(nvbench::option_parser &parser)
[[nodiscard]] const auto &parser_to_states(nvbench::option_parser &parser)
{
const auto &benches = parser.get_benchmarks();
ASSERT(benches.size() == 1);
@@ -267,8 +265,7 @@ void test_int64_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 2 : 1 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -308,8 +305,7 @@ void test_int64_axis_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -323,8 +319,7 @@ void test_int64_axis_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -369,8 +364,7 @@ void test_int64_axis_pow2_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -384,8 +378,7 @@ void test_int64_axis_pow2_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 7 : 7 : 1 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -425,8 +418,7 @@ void test_int64_axis_pow2_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -440,8 +432,7 @@ void test_int64_axis_pow2_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ pow2 ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -486,8 +477,7 @@ void test_int64_axis_none_to_pow2_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -501,8 +491,7 @@ void test_int64_axis_none_to_pow2_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 7 : 7 : 1 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -542,8 +531,7 @@ void test_int64_axis_none_to_pow2_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -557,8 +545,7 @@ void test_int64_axis_none_to_pow2_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Ints [ pow2 ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -617,8 +604,7 @@ void test_int64_axis_pow2_to_none_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 2 : 1 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -658,8 +644,7 @@ void test_int64_axis_pow2_to_none_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 , 7 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -673,8 +658,7 @@ void test_int64_axis_pow2_to_none_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " PO2s [ ] = [ 2 : 7 : 5 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -719,8 +703,7 @@ void test_float64_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -734,10 +717,7 @@ void test_float64_axis_single()
{
nvbench::option_parser parser;
parser.parse({"--benchmark",
"TestBench",
"--axis",
" Floats [ ] = [ 3.5 : 3.6 : 1 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 3.6 : 1 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -777,8 +757,7 @@ void test_float64_axis_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 , 4.1 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -792,18 +771,14 @@ void test_float64_axis_multi()
{
nvbench::option_parser parser;
parser.parse({"--benchmark",
"TestBench",
"--axis",
" Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Floats [ ] = [ 3.5 : 4.2 : 0.6 ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"});
parser.parse({"--benchmark", "TestBench", "--axis", "Floats=[3.5:4.2:0.6]"});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -827,8 +802,7 @@ void test_string_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "});
parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = fo br "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -842,8 +816,7 @@ void test_string_axis_single()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -883,8 +856,7 @@ void test_string_axis_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " Strings [ ] = [ fo br , baz ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -951,8 +923,7 @@ void test_type_axis_multi()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "});
parser.parse({"--benchmark", "TestBench", "--axis", " T [ ] = [ U8, void ] "});
const auto test = parser_to_state_string(parser);
ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test);
}
@@ -1177,9 +1148,8 @@ void test_axis_before_benchmark()
void test_min_samples()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench", "--min-samples", "12345"});
const auto& states = parser_to_states(parser);
parser.parse({"--benchmark", "DummyBench", "--min-samples", "12345"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(states[0].get_min_samples() == 12345);
@@ -1188,9 +1158,8 @@ void test_min_samples()
void test_min_time()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench", "--min-time", "12345e2"});
const auto& states = parser_to_states(parser);
parser.parse({"--benchmark", "DummyBench", "--min-time", "12345e2"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_min_time() - 12345e2) < 1.);
@@ -1199,9 +1168,8 @@ void test_min_time()
void test_max_noise()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench", "--max-noise", "50.3"});
const auto& states = parser_to_states(parser);
parser.parse({"--benchmark", "DummyBench", "--max-noise", "50.3"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_max_noise() - 0.503) < 1.e-4);
@@ -1210,9 +1178,8 @@ void test_max_noise()
void test_skip_time()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench", "--skip-time", "12345e2"});
const auto& states = parser_to_states(parser);
parser.parse({"--benchmark", "DummyBench", "--skip-time", "12345e2"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_skip_time() - 12345e2) < 1.);
@@ -1221,9 +1188,8 @@ void test_skip_time()
void test_timeout()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench", "--timeout", "12345e2"});
const auto& states = parser_to_states(parser);
parser.parse({"--benchmark", "DummyBench", "--timeout", "12345e2"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.);
@@ -1232,12 +1198,15 @@ void test_timeout()
void test_stopping_criterion()
{
nvbench::option_parser parser;
parser.parse(
{"--benchmark", "DummyBench",
"--stopping-criterion", "entropy",
"--max-angle", "0.42",
"--min-r2", "0.6"});
const auto& states = parser_to_states(parser);
parser.parse({"--benchmark",
"DummyBench",
"--stopping-criterion",
"entropy",
"--max-angle",
"0.42",
"--min-r2",
"0.6"});
const auto &states = parser_to_states(parser);
ASSERT(states.size() == 1);
ASSERT(states[0].get_stopping_criterion() == "entropy");

View File

@@ -22,12 +22,9 @@
void test_basic()
{
ASSERT((nvbench::range(0, 6) ==
std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
ASSERT((nvbench::range(0, 6, 1) ==
std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
ASSERT(
(nvbench::range(0, 6, 2) == std::vector<nvbench::int64_t>{0, 2, 4, 6}));
ASSERT((nvbench::range(0, 6) == std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
ASSERT((nvbench::range(0, 6, 1) == std::vector<nvbench::int64_t>{0, 1, 2, 3, 4, 5, 6}));
ASSERT((nvbench::range(0, 6, 2) == std::vector<nvbench::int64_t>{0, 2, 4, 6}));
ASSERT((nvbench::range(0, 6, 3) == std::vector<nvbench::int64_t>{0, 3, 6}));
ASSERT((nvbench::range(0, 6, 4) == std::vector<nvbench::int64_t>{0, 4}));
ASSERT((nvbench::range(0, 6, 5) == std::vector<nvbench::int64_t>{0, 5}));
@@ -37,26 +34,19 @@ void test_basic()
void test_result_type()
{
// All ints should turn into int64 by default:
ASSERT((std::is_same_v<decltype(nvbench::range(0ll, 1ll)),
std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0, 1)),
std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0u, 1u)),
std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0ll, 1ll)), std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0, 1)), std::vector<nvbench::int64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0u, 1u)), std::vector<nvbench::int64_t>>));
// All floats should turn into float64 by default:
ASSERT((std::is_same_v<decltype(nvbench::range(0., 1.)),
std::vector<nvbench::float64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0.f, 1.f)),
std::vector<nvbench::float64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0., 1.)), std::vector<nvbench::float64_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range(0.f, 1.f)), std::vector<nvbench::float64_t>>));
// Other types may be explicitly specified:
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::float32_t,
nvbench::float32_t>(0.f, 1.f)),
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::float32_t, nvbench::float32_t>(0.f, 1.f)),
std::vector<nvbench::float32_t>>));
ASSERT((std::is_same_v<
decltype(nvbench::range<nvbench::int32_t, nvbench::int32_t>(0, 1)),
std::vector<nvbench::int32_t>>));
ASSERT((std::is_same_v<decltype(nvbench::range<nvbench::int32_t, nvbench::int32_t>(0, 1)),
std::vector<nvbench::int32_t>>));
}
void test_fp_tolerance()
@@ -68,10 +58,8 @@ void test_fp_tolerance()
const nvbench::float32_t stride = 1e-4f;
for (std::size_t size = 1; size < 1024; ++size)
{
const nvbench::float32_t end =
start + stride * static_cast<nvbench::float32_t>(size - 1);
ASSERT_MSG(nvbench::range(start, end, stride).size() == size,
"size={}", size);
const nvbench::float32_t end = start + stride * static_cast<nvbench::float32_t>(size - 1);
ASSERT_MSG(nvbench::range(start, end, stride).size() == size, "size={}", size);
}
}

View File

@@ -2,18 +2,17 @@
#include "test_asserts.cuh"
namespace
{
__global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b)
{
const auto id = blockIdx.x * blockDim.x + threadIdx.x;
b[id] = 5 * a[id];
}
__global__ void multiply5(const int32_t *__restrict__ a, int32_t *__restrict__ b)
{
const auto id = blockIdx.x * blockDim.x + threadIdx.x;
b[id] = 5 * a[id];
}
} // namespace
int main()
{
{
multiply5<<<256, 256>>>(nullptr, nullptr);
try

View File

@@ -18,14 +18,13 @@
#include <nvbench/detail/ring_buffer.cuh>
#include "test_asserts.cuh"
#include <algorithm>
#include <vector>
#include "test_asserts.cuh"
template <typename T>
bool equal(const nvbench::detail::ring_buffer<T> &buffer,
const std::vector<T> &reference)
bool equal(const nvbench::detail::ring_buffer<T> &buffer, const std::vector<T> &reference)
{
return std::equal(buffer.begin(), buffer.end(), reference.begin());
}

View File

@@ -16,23 +16,22 @@
* limitations under the License.
*/
#include <nvbench/runner.cuh>
#include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh>
#include <nvbench/runner.cuh>
#include <nvbench/state.cuh>
#include <nvbench/type_list.cuh>
#include <nvbench/type_strings.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include <algorithm>
#include <variant>
#include <vector>
#include "test_asserts.cuh"
template <typename T>
std::vector<T> sort(std::vector<T> &&vec)
{
@@ -65,21 +64,16 @@ using misc_types = nvbench::type_list<bool, void>;
using type_axes = nvbench::type_list<float_types, int_types, misc_types>;
template <typename FloatT, typename IntT, typename MiscT>
void template_no_op_generator(nvbench::state &state,
nvbench::type_list<FloatT, IntT, MiscT>)
void template_no_op_generator(nvbench::state &state, nvbench::type_list<FloatT, IntT, MiscT>)
{
ASSERT(nvbench::type_strings<FloatT>::input_string() ==
state.get_string("FloatT"));
ASSERT(nvbench::type_strings<IntT>::input_string() ==
state.get_string("IntT"));
ASSERT(nvbench::type_strings<IntT>::input_string() ==
state.get_string("IntT"));
ASSERT(nvbench::type_strings<FloatT>::input_string() == state.get_string("FloatT"));
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
ASSERT(nvbench::type_strings<IntT>::input_string() == state.get_string("IntT"));
// Enum params using non-templated version:
no_op_generator(state);
}
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator,
template_no_op_callable);
NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, template_no_op_callable);
void test_empty()
{

View File

@@ -16,10 +16,9 @@
* limitations under the License.
*/
#include <nvbench/state.cuh>
#include <nvbench/benchmark.cuh>
#include <nvbench/callable.cuh>
#include <nvbench/state.cuh>
#include <nvbench/summary.cuh>
#include <nvbench/types.cuh>
@@ -43,8 +42,7 @@ struct state_tester : public nvbench::state
void set_param(std::string name, T &&value)
{
this->state::m_axis_values.set_value(std::move(name),
nvbench::named_values::value_type{
std::forward<T>(value)});
nvbench::named_values::value_type{std::forward<T>(value)});
}
};
} // namespace nvbench::detail

Some files were not shown because too many files have changed in this diff Show More