diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 2a40eae..76b67a1 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -2,12 +2,14 @@ set(srcs axes_metadata.cu axis_base.cu benchmark_base.cu + cuda_call.cu float64_axis.cu int64_axis.cu params.cu + state.cu string_axis.cu type_axis.cu - state.cu + detail/state_generator.cu ) diff --git a/nvbench/cuda_call.cu b/nvbench/cuda_call.cu new file mode 100644 index 0000000..19bce24 --- /dev/null +++ b/nvbench/cuda_call.cu @@ -0,0 +1,30 @@ +#include + +#include +#include + +#include + +namespace nvbench +{ + +namespace cuda_call +{ + +void throw_error(const std::string &filename, + std::size_t lineno, + const std::string &command, + cudaError_t error_code) +{ + throw std::runtime_error(fmt::format("{}:{}: Cuda API call returned error: " + "{}: {}\nCommand: '{}'", + filename, + lineno, + cudaGetErrorName(error_code), + cudaGetErrorString(error_code), + command)); +} + +} // namespace cuda_call + +} // namespace nvbench diff --git a/nvbench/cuda_call.cuh b/nvbench/cuda_call.cuh new file mode 100644 index 0000000..42792f0 --- /dev/null +++ b/nvbench/cuda_call.cuh @@ -0,0 +1,31 @@ +#pragma once + +#include + +#include + +#define NVBENCH_CUDA_CALL(call) \ + do \ + { \ + const cudaError_t nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != cudaSuccess) \ + { \ + nvbench::cuda_call::throw_error(__FILE__, \ + __LINE__, \ + #call, \ + nvbench_cuda_call_error); \ + } \ + } while (false) + +namespace nvbench +{ +namespace cuda_call +{ + +void throw_error(const std::string &filename, + std::size_t lineno, + const std::string &call, + cudaError_t error); + +} // namespace cuda_call +} // namespace nvbench diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh new file mode 100644 index 0000000..e66b27d --- /dev/null +++ b/nvbench/cuda_stream.cuh @@ -0,0 +1,26 @@ +#include + +#include + +namespace nvbench +{ + +// RAII wrapper for a cudaStream_t. +struct cuda_stream +{ + cuda_stream() { NVBENCH_CUDA_CALL(cudaStreamCreate(&m_stream)); } + ~cuda_stream() { NVBENCH_CUDA_CALL(cudaStreamDestroy(m_stream)); } + + // move-only + cuda_stream(const cuda_stream &) = delete; + cuda_stream(cuda_stream &&) = default; + cuda_stream &operator=(const cuda_stream &) = delete; + cuda_stream &operator=(cuda_stream &&) = default; + + operator cudaStream_t() { return m_stream; } + +private: + cudaStream_t m_stream; +}; + +} // namespace nvbench diff --git a/nvbench/cuda_timer.cuh b/nvbench/cuda_timer.cuh new file mode 100644 index 0000000..52b92ca --- /dev/null +++ b/nvbench/cuda_timer.cuh @@ -0,0 +1,65 @@ +#pragma once + +#include + +#include + +namespace nvbench +{ + +struct cuda_timer +{ + cuda_timer() + { + NVBENCH_CUDA_CALL(cudaEventCreate(&m_start)); + NVBENCH_CUDA_CALL(cudaEventCreate(&m_stop)); + } + + ~cuda_timer() + { + NVBENCH_CUDA_CALL(cudaEventDestroy(m_start)); + NVBENCH_CUDA_CALL(cudaEventDestroy(m_stop)); + } + + // move-only + cuda_timer(const cuda_timer &) = delete; + cuda_timer(cuda_timer &&) = default; + cuda_timer &operator=(const cuda_timer &) = delete; + cuda_timer &operator=(cuda_timer &&) = default; + + void start(cudaStream_t stream) + { + NVBENCH_CUDA_CALL(cudaEventRecord(m_start, stream)); + } + + void stop(cudaStream_t stream) + { + NVBENCH_CUDA_CALL(cudaEventRecord(m_stop, stream)); + } + + bool ready() const + { + const cudaError_t state = cudaEventQuery(m_stop); + if (state == cudaErrorNotReady) + { + return false; + } + NVBENCH_CUDA_CALL(state); + return true; + } + + double get_duration() const + { + NVBENCH_CUDA_CALL(cudaEventSynchronize(m_stop)); + float elapsed_time; + // According to docs, this is in ms with a resolution of ~0.5 microseconds. + NVBENCH_CUDA_CALL(cudaEventElapsedTime(&elapsed_time, m_start, m_stop)); + return elapsed_time / 1000.0; + } + +private: + cudaEvent_t m_start; + cudaEvent_t m_stop; +}; + +} // namespace nvbench diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index fb01968..7e9218d 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -1,8 +1,9 @@ set(test_srcs axes_metadata.cu benchmark.cu - int64_axis.cu + cuda_timer.cu float64_axis.cu + int64_axis.cu params.cu state.cu state_generator.cu diff --git a/testing/cuda_timer.cu b/testing/cuda_timer.cu new file mode 100644 index 0000000..751bcd8 --- /dev/null +++ b/testing/cuda_timer.cu @@ -0,0 +1,58 @@ +#include + +#include +#include + +#include "test_asserts.cuh" + +#include + +#include + +#include + +__global__ void sleep_kernel(double seconds) +{ + const auto start = cuda::std::chrono::high_resolution_clock::now(); + const auto ns = cuda::std::chrono::nanoseconds( + static_cast(seconds * 1000 * 1000 * 1000)); + const auto finish = start + ns; + + auto now = cuda::std::chrono::high_resolution_clock::now(); + while (now < finish) + { + now = cuda::std::chrono::high_resolution_clock::now(); + } +} + +void test_basic(cudaStream_t time_stream, + cudaStream_t exec_stream, + bool expected) +{ + nvbench::cuda_timer timer; + + NVBENCH_CUDA_CALL(cudaDeviceSynchronize()); + + timer.start(time_stream); + sleep_kernel<<<1, 1, 0, exec_stream>>>(0.25); + timer.stop(time_stream); + + NVBENCH_CUDA_CALL(cudaDeviceSynchronize()); + const bool captured = timer.get_duration() > 0.25; + ASSERT_MSG(captured == expected, + fmt::format("Unexpected result from timer: {} seconds " + " (expected {})", + timer.get_duration(), + (expected ? "> 0.25s" : "< 0.25s"))); +} + +void test_basic() +{ + nvbench::cuda_stream stream1; + nvbench::cuda_stream stream2; + + test_basic(stream1, stream1, true); + test_basic(stream1, stream2, false); +} + +int main() { test_basic(); }