Added cudaGetLastError() calls to reset benchmarking kernel errors (issue 88). (#173)

* Create and use NVBENCH_CUDA_CALL_RESET_ERROR. * Moved cudaGetLastError() call to NVBENCH_CUDA_CALL macro --------- Co-authored-by: Sergey Pavlov <psvvsp89@gmail.com>
2026-04-20 14:58:54 +00:00 · 2024-05-31 19:32:01 +04:00
parent 088c9ee658
commit a171514056
3 changed files with 33 additions and 0 deletions
--- a/nvbench/cuda_call.cuh
+++ b/nvbench/cuda_call.cuh
@@ -24,12 +24,14 @@
 #include <string>

 /// Throws a std::runtime_error if `call` doesn't return `cudaSuccess`.
+/// Resets the error with cudaGetLastError().
 #define NVBENCH_CUDA_CALL(call)                                                                    \
  do                                                                                               \
  {                                                                                                \
    const cudaError_t nvbench_cuda_call_error = call;                                              \
    if (nvbench_cuda_call_error != cudaSuccess)                                                    \
    {                                                                                              \
+      cudaGetLastError();                                                                          \
      nvbench::cuda_call::throw_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error);         \
    }                                                                                              \
  } while (false)
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -16,6 +16,7 @@ set(test_srcs
  named_values.cu
  option_parser.cu
  range.cu
+  reset_error.cu
  ring_buffer.cu
  runner.cu
  state.cu
--- a/testing/reset_error.cu
+++ b/testing/reset_error.cu
@@ -0,0 +1,30 @@
+#include <nvbench/cuda_call.cuh>
+
+#include "test_asserts.cuh"
+
+
+namespace
+{
+    __global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b)
+    {
+      const auto id = blockIdx.x * blockDim.x + threadIdx.x;
+      b[id] = 5 * a[id];
+    }
+}
+
+int main()
+{ 
+  multiply5<<<256, 256>>>(nullptr, nullptr);
+
+  try
+  {
+    NVBENCH_CUDA_CALL(cudaStreamSynchronize(0));
+    ASSERT(false);
+  }
+  catch (const std::runtime_error &)
+  {
+    ASSERT(cudaGetLastError() == cudaError_t::cudaSuccess);
+  }
+
+  return 0;
+}