Files
nvbench/examples/custom_criterion.cu
Allison Piper e8c8877d36 Squashed commit of the following:
commit 4b309e6ad8
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 13:19:14 2024 +0000

    Minor cleanups

commit 476ed2ceae
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 12:53:37 2024 +0000

    WAR compiler ice in nlohmann json.

    Only seeing this on GCC 9 + CTK 11.1. Seems to be
    having trouble with the `[[no_unique_address]]` optimization.

commit a9bf1d3e42
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 00:24:47 2024 +0000

    Bump nlohmann json.

commit 80980fe373
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 00:22:07 2024 +0000

    Fix llvm filesystem support

commit f6099e6311
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 23:18:44 2024 +0000

    Drop MSVC 2017 testing.

commit 5ae50a8ef5
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 23:02:32 2024 +0000

    Add mroe missing headers.

commit b2a9ae04d9
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 22:37:56 2024 +0000

    Remove old CUDA+MSVC builds and make windows build-only.

commit 5b18c26a28
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 22:37:07 2024 +0000

    Fix header for std::min/max.

    Why do I always think it's utility instead of algorithm....

commit 6a409efa2d
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 22:18:18 2024 +0000

    Temporarily disable CUPTI on all windows builds.

commit f432f88866
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 21:42:52 2024 +0000

    Fix warnings on MSVC.

commit 829787649b
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 21:03:16 2024 +0000

    More flailing about in powershell.

commit 21742e6bea
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 20:36:08 2024 +0000

    Cleanup filesystem header handling.

commit de3d202635
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 20:09:00 2024 +0000

    Windows CI debugging.

commit a4151667ff
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:45:40 2024 +0000

    Quotation mark madness

commit dd04f3befe
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:27:27 2024 +0000

    Temporarily disable NVML on windows CI until new containers are ready.

commit f3952848c4
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:25:22 2024 +0000

    WAR issues on gcc-7.

commit 198986875e
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:25:04 2024 +0000

    More matrix/devcontainer updates.

commit b9712f8696
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 18:30:35 2024 +0000

    Fix windows build scripts.

commit 943f268280
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 18:18:33 2024 +0000

    Fix warnings with clang host compiler.

commit 7063e1d60a
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 18:14:28 2024 +0000

    More devcontainer hijinks.

commit 06532fde81
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:51:25 2024 +0000

    More matrix updates.

commit 78a265ea55
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:34:00 2024 +0000

    Support CLI CMake options for windows ci scripts.

commit 670895c867
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:31:59 2024 +0000

    Add missing devcontainers.

commit b121823e74
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:22:54 2024 +0000

    Build for `all-major` architectures in presets.

    We can get away with this because we require CMake 3.23.1.
    This was added in 3.23.

commit fccfd44685
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:22:08 2024 +0000

    Update matrix file.

commit e7d43ba90e
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 16:23:48 2024 +0000

    Consolidate build/test jobs.

commit c4044056ec
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 16:04:11 2024 +0000

    Add missing build script.
2024-04-06 13:56:10 +00:00

82 lines
2.6 KiB
Plaintext

/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/nvbench.cuh>
// Grab some testing kernels from NVBench:
#include <nvbench/test_kernels.cuh>
// Thrust vectors simplify memory management:
#include <thrust/device_vector.h>
// Inherit from the stopping_criterion_base class:
class fixed_criterion final : public nvbench::stopping_criterion_base
{
nvbench::int64_t m_num_samples{};
public:
fixed_criterion()
: nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}}
{}
protected:
// Setup the criterion in the `do_initialize()` method:
virtual void do_initialize() override
{
m_num_samples = 0;
}
// Process new measurements in the `add_measurement()` method:
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
{
m_num_samples++;
}
// Check if the stopping criterion is met in the `is_finished()` method:
virtual bool do_is_finished() override
{
return m_num_samples >= m_params.get_int64("max-samples");
}
};
// Register the criterion with NVBench:
NVBENCH_REGISTER_CRITERION(fixed_criterion);
void throughput_bench(nvbench::state &state)
{
// Allocate input data:
const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t);
thrust::device_vector<nvbench::int32_t> input(num_values);
thrust::device_vector<nvbench::int32_t> output(num_values);
// Provide throughput information:
state.add_element_count(num_values, "NumElements");
state.add_global_memory_reads<nvbench::int32_t>(num_values, "DataSize");
state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),
num_values);
});
}
NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed");