Files
nvbench/nvbench/test_kernels.cuh
Allison Vacanti 3fc75f5ea6 Add more examples.
- exec_tag_timer
- exec_tag_sync
- skip
- throughput
2021-03-09 16:03:14 -05:00

83 lines
2.0 KiB
Plaintext

/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cuda/std/chrono>
#include <cuda_runtime.h>
/*!
* @file test_kernels.cuh
* A collection of simple kernels for testing purposes.
*
* Note that these kernels are written to be short and simple, not performant.
*/
namespace nvbench
{
/*!
* Each launched thread just sleeps for `seconds`.
*/
__global__ void sleep_kernel(double seconds)
{
const auto start = cuda::std::chrono::high_resolution_clock::now();
const auto ns = cuda::std::chrono::nanoseconds(
static_cast<nvbench::int64_t>(seconds * 1000 * 1000 * 1000));
const auto finish = start + ns;
auto now = cuda::std::chrono::high_resolution_clock::now();
while (now < finish)
{
now = cuda::std::chrono::high_resolution_clock::now();
}
}
/*!
* Naive copy of `n` values from `in` -> `out`.
*/
template <typename T, typename U>
__global__ void copy_kernel(const T* in, U* out, std::size_t n)
{
const auto init = blockIdx.x * blockDim.x + threadIdx.x;
const auto step = blockDim.x * gridDim.x;
for (auto i = init; i < n; i += step)
{
out[i] = static_cast<U>(in[i]);
}
}
/*!
* For `i <- [0,n)`, `out[i] = in[i] % 2`.
*/
template <typename T, typename U>
__global__ void mod2_kernel(const T* in, U* out, std::size_t n)
{
const auto init = blockIdx.x * blockDim.x + threadIdx.x;
const auto step = blockDim.x * gridDim.x;
for (auto i = init; i < n; i += step)
{
out[i] = static_cast<U>(in[i] % 2);
}
}
}