Add cuda_timer, cuda_stream.

This commit is contained in:
Allison Vacanti
2020-12-29 23:29:02 -05:00
parent 981031e732
commit b07ffafff4
7 changed files with 215 additions and 2 deletions

View File

@@ -2,12 +2,14 @@ set(srcs
axes_metadata.cu
axis_base.cu
benchmark_base.cu
cuda_call.cu
float64_axis.cu
int64_axis.cu
params.cu
state.cu
string_axis.cu
type_axis.cu
state.cu
detail/state_generator.cu
)

30
nvbench/cuda_call.cu Normal file
View File

@@ -0,0 +1,30 @@
#include <nvbench/cuda_call.cuh>
#include <stdexcept>
#include <string>
#include <fmt/format.h>
namespace nvbench
{
namespace cuda_call
{
void throw_error(const std::string &filename,
std::size_t lineno,
const std::string &command,
cudaError_t error_code)
{
throw std::runtime_error(fmt::format("{}:{}: Cuda API call returned error: "
"{}: {}\nCommand: '{}'",
filename,
lineno,
cudaGetErrorName(error_code),
cudaGetErrorString(error_code),
command));
}
} // namespace cuda_call
} // namespace nvbench

31
nvbench/cuda_call.cuh Normal file
View File

@@ -0,0 +1,31 @@
#pragma once
#include <cuda_runtime_api.h>
#include <string>
#define NVBENCH_CUDA_CALL(call) \
do \
{ \
const cudaError_t nvbench_cuda_call_error = call; \
if (nvbench_cuda_call_error != cudaSuccess) \
{ \
nvbench::cuda_call::throw_error(__FILE__, \
__LINE__, \
#call, \
nvbench_cuda_call_error); \
} \
} while (false)
namespace nvbench
{
namespace cuda_call
{
void throw_error(const std::string &filename,
std::size_t lineno,
const std::string &call,
cudaError_t error);
} // namespace cuda_call
} // namespace nvbench

26
nvbench/cuda_stream.cuh Normal file
View File

@@ -0,0 +1,26 @@
#include <nvbench/cuda_call.cuh>
#include <cuda_runtime_api.h>
namespace nvbench
{
// RAII wrapper for a cudaStream_t.
struct cuda_stream
{
cuda_stream() { NVBENCH_CUDA_CALL(cudaStreamCreate(&m_stream)); }
~cuda_stream() { NVBENCH_CUDA_CALL(cudaStreamDestroy(m_stream)); }
// move-only
cuda_stream(const cuda_stream &) = delete;
cuda_stream(cuda_stream &&) = default;
cuda_stream &operator=(const cuda_stream &) = delete;
cuda_stream &operator=(cuda_stream &&) = default;
operator cudaStream_t() { return m_stream; }
private:
cudaStream_t m_stream;
};
} // namespace nvbench

65
nvbench/cuda_timer.cuh Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
#include <nvbench/cuda_call.cuh>
#include <cuda_runtime_api.h>
namespace nvbench
{
struct cuda_timer
{
cuda_timer()
{
NVBENCH_CUDA_CALL(cudaEventCreate(&m_start));
NVBENCH_CUDA_CALL(cudaEventCreate(&m_stop));
}
~cuda_timer()
{
NVBENCH_CUDA_CALL(cudaEventDestroy(m_start));
NVBENCH_CUDA_CALL(cudaEventDestroy(m_stop));
}
// move-only
cuda_timer(const cuda_timer &) = delete;
cuda_timer(cuda_timer &&) = default;
cuda_timer &operator=(const cuda_timer &) = delete;
cuda_timer &operator=(cuda_timer &&) = default;
void start(cudaStream_t stream)
{
NVBENCH_CUDA_CALL(cudaEventRecord(m_start, stream));
}
void stop(cudaStream_t stream)
{
NVBENCH_CUDA_CALL(cudaEventRecord(m_stop, stream));
}
bool ready() const
{
const cudaError_t state = cudaEventQuery(m_stop);
if (state == cudaErrorNotReady)
{
return false;
}
NVBENCH_CUDA_CALL(state);
return true;
}
double get_duration() const
{
NVBENCH_CUDA_CALL(cudaEventSynchronize(m_stop));
float elapsed_time;
// According to docs, this is in ms with a resolution of ~0.5 microseconds.
NVBENCH_CUDA_CALL(cudaEventElapsedTime(&elapsed_time, m_start, m_stop));
return elapsed_time / 1000.0;
}
private:
cudaEvent_t m_start;
cudaEvent_t m_stop;
};
} // namespace nvbench

View File

@@ -1,8 +1,9 @@
set(test_srcs
axes_metadata.cu
benchmark.cu
int64_axis.cu
cuda_timer.cu
float64_axis.cu
int64_axis.cu
params.cu
state.cu
state_generator.cu

58
testing/cuda_timer.cu Normal file
View File

@@ -0,0 +1,58 @@
#include <nvbench/cuda_timer.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/types.cuh>
#include "test_asserts.cuh"
#include <fmt/format.h>
#include <cuda/std/chrono>
#include <cuda_runtime.h>
__global__ void sleep_kernel(double seconds)
{
const auto start = cuda::std::chrono::high_resolution_clock::now();
const auto ns = cuda::std::chrono::nanoseconds(
static_cast<nvbench::int64_t>(seconds * 1000 * 1000 * 1000));
const auto finish = start + ns;
auto now = cuda::std::chrono::high_resolution_clock::now();
while (now < finish)
{
now = cuda::std::chrono::high_resolution_clock::now();
}
}
void test_basic(cudaStream_t time_stream,
cudaStream_t exec_stream,
bool expected)
{
nvbench::cuda_timer timer;
NVBENCH_CUDA_CALL(cudaDeviceSynchronize());
timer.start(time_stream);
sleep_kernel<<<1, 1, 0, exec_stream>>>(0.25);
timer.stop(time_stream);
NVBENCH_CUDA_CALL(cudaDeviceSynchronize());
const bool captured = timer.get_duration() > 0.25;
ASSERT_MSG(captured == expected,
fmt::format("Unexpected result from timer: {} seconds "
" (expected {})",
timer.get_duration(),
(expected ? "> 0.25s" : "< 0.25s")));
}
void test_basic()
{
nvbench::cuda_stream stream1;
nvbench::cuda_stream stream2;
test_basic(stream1, stream1, true);
test_basic(stream1, stream2, false);
}
int main() { test_basic(); }