Clean up blocking_kernel.

- Rename release() -> unblock() to avoid confusion with release fences.
- Remove some unused headers.
This commit is contained in:
Allison Vacanti
2021-02-14 16:07:22 -05:00
parent 1cea5e1965
commit e5914ff620
4 changed files with 7 additions and 10 deletions

View File

@@ -2,9 +2,6 @@
#include <nvbench/cuda_call.cuh>
#include <nvbench/cuda_stream.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#include <cuda_runtime.h>
@@ -42,7 +39,7 @@ void blocking_kernel::block(const nvbench::cuda_stream &stream)
block_stream<<<1, 1, 0, stream>>>(m_device_flag);
}
void blocking_kernel::release()
void blocking_kernel::unblock()
{
volatile int& flag = m_host_flag;
flag = 1;

View File

@@ -35,15 +35,15 @@ struct cuda_stream;
* my_kernel<<<...>>>();
* NVBENCH_CUDA_CALL(cudaEventRecord(stop_event))
*
* blocker.release();
* blocker.unblock();
* ```
*
* The work submitted after `blocker.block(stream)` will not execute until
* `blocker.release()` is called.
* `blocker.unblock()` is called.
*
* ## Caveats and warnings
*
* - Every call to `block()` must be followed by a call to `release()`.
* - Every call to `block()` must be followed by a call to `unblock()`.
* - Do not queue "too much" work while blocking.
* - Amount of work depends on device and driver.
* - Do tests and schedule conservatively (~32 kernel launches max).
@@ -58,7 +58,7 @@ struct blocking_kernel
~blocking_kernel();
void block(const nvbench::cuda_stream &stream);
void release();
void unblock();
// move-only
blocking_kernel(const blocking_kernel &) = delete;

View File

@@ -118,7 +118,7 @@ private:
m_cuda_timer.stop(m_launch.get_stream());
m_cpu_timer.start();
blocker.release();
blocker.unblock();
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
m_cpu_timer.stop();

View File

@@ -116,7 +116,7 @@ private:
}
m_cpu_timer.start();
blocker.release(); // Start executing earlier launches
blocker.unblock(); // Start executing earlier launches
for (nvbench::int64_t i = 0; i < unblocked_launches; ++i)
{