diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh
index 89b4201..39b2377 100644
--- a/nvbench/detail/measure_cold.cuh
+++ b/nvbench/detail/measure_cold.cuh
@@ -89,8 +89,14 @@ protected:
   }
   __forceinline__ void profiler_start() const { NVBENCH_CUDA_CALL(cudaProfilerStart()); }
   __forceinline__ void profiler_stop() const { NVBENCH_CUDA_CALL(cudaProfilerStop()); }
+  __forceinline__ void profiler_stop_noexcept() const noexcept { (void)cudaProfilerStop(); }
   void block_stream();
   __forceinline__ void unblock_stream() { m_blocker.unblock(); }
+  __forceinline__ void unblock_stream_noexcept() noexcept { m_blocker.unblock_noexcept(); }
+  __forceinline__ void sync_stream_noexcept() const noexcept
+  {
+    (void)cudaStreamSynchronize(m_launch.get_stream());
+  }
 
   nvbench::state &m_state;
 
@@ -167,54 +173,121 @@ struct measure_cold_base::kernel_launch_timer
       , m_check_throttling{check_throttling}
   {}
 
+  ~kernel_launch_timer() noexcept { this->cleanup_noexcept(); }
+
   __forceinline__ void start()
   {
-    m_measure.flush_device_l2();
-    m_measure.sync_stream();
+    try
+    {
+      m_measure.flush_device_l2();
+      m_measure.sync_stream();
 
-    // start CPU timer irrespective of use of blocking kernel
-    // Ref: https://github.com/NVIDIA/nvbench/issues/249
-    m_measure.m_cpu_timer.start();
+      // start CPU timer irrespective of use of blocking kernel
+      // Ref: https://github.com/NVIDIA/nvbench/issues/249
+      m_measure.m_cpu_timer.start();
+      m_cpu_timer_started = true;
 
-    if (!m_disable_blocking_kernel)
-    {
-      m_measure.block_stream();
+      if (!m_disable_blocking_kernel)
+      {
+        m_measure.block_stream();
+        m_stream_blocked = true;
+      }
+      if (m_check_throttling)
+      {
+        m_measure.gpu_frequency_start();
+        m_gpu_frequency_started = true;
+      }
+      if (m_run_once)
+      {
+        m_measure.profiler_start();
+        m_profiler_started = true;
+      }
+      m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
+      m_cuda_timer_started = true;
     }
-    if (m_check_throttling)
+    catch (...)
     {
-      m_measure.gpu_frequency_start();
+      this->cleanup_noexcept();
+      throw;
     }
-    if (m_run_once)
-    {
-      m_measure.profiler_start();
-    }
-    m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
   }
 
   __forceinline__ void stop()
   {
-    m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
-    if (m_check_throttling)
+    try
     {
-      m_measure.gpu_frequency_stop();
+      if (m_cuda_timer_started)
+      {
+        m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
+        m_cuda_timer_started = false;
+      }
+      if (m_gpu_frequency_started)
+      {
+        m_measure.gpu_frequency_stop();
+        m_gpu_frequency_started = false;
+      }
+      if (m_stream_blocked)
+      {
+        m_measure.unblock_stream();
+        m_stream_blocked = false;
+      }
+      m_measure.sync_stream();
+      if (m_profiler_started)
+      {
+        m_measure.profiler_stop();
+        m_profiler_started = false;
+      }
+      if (m_cpu_timer_started)
+      {
+        m_measure.m_cpu_timer.stop();
+        m_cpu_timer_started = false;
+      }
     }
-    if (!m_disable_blocking_kernel)
+    catch (...)
     {
-      m_measure.unblock_stream();
+      this->cleanup_noexcept();
+      throw;
     }
-    m_measure.sync_stream();
-    if (m_run_once)
-    {
-      m_measure.profiler_stop();
-    }
-    m_measure.m_cpu_timer.stop();
   }
 
 private:
+  void cleanup_noexcept() noexcept
+  {
+    const bool needs_sync = m_stream_blocked || m_cuda_timer_started || m_gpu_frequency_started;
+
+    if (m_stream_blocked)
+    {
+      m_measure.unblock_stream_noexcept();
+      m_stream_blocked = false;
+    }
+    if (needs_sync)
+    {
+      m_measure.sync_stream_noexcept();
+    }
+    if (m_profiler_started)
+    {
+      m_measure.profiler_stop_noexcept();
+      m_profiler_started = false;
+    }
+    if (m_cpu_timer_started)
+    {
+      m_measure.m_cpu_timer.stop();
+      m_cpu_timer_started = false;
+    }
+
+    m_cuda_timer_started    = false;
+    m_gpu_frequency_started = false;
+  }
+
   measure_cold_base &m_measure;
   bool m_disable_blocking_kernel;
   bool m_run_once;
   bool m_check_throttling;
+  bool m_cpu_timer_started{false};
+  bool m_stream_blocked{false};
+  bool m_gpu_frequency_started{false};
+  bool m_profiler_started{false};
+  bool m_cuda_timer_started{false};
 };
 
 template <typename KernelLauncher>