mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-19 22:38:52 +00:00
Clean up blocking_kernel.
- Rename release() -> unblock() to avoid confusion with release fences. - Remove some unused headers.
This commit is contained in:
@@ -2,9 +2,6 @@
|
||||
|
||||
#include <nvbench/cuda_call.cuh>
|
||||
#include <nvbench/cuda_stream.cuh>
|
||||
#include <nvbench/detail/throw.cuh>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
@@ -42,7 +39,7 @@ void blocking_kernel::block(const nvbench::cuda_stream &stream)
|
||||
block_stream<<<1, 1, 0, stream>>>(m_device_flag);
|
||||
}
|
||||
|
||||
void blocking_kernel::release()
|
||||
void blocking_kernel::unblock()
|
||||
{
|
||||
volatile int& flag = m_host_flag;
|
||||
flag = 1;
|
||||
|
||||
@@ -35,15 +35,15 @@ struct cuda_stream;
|
||||
* my_kernel<<<...>>>();
|
||||
* NVBENCH_CUDA_CALL(cudaEventRecord(stop_event))
|
||||
*
|
||||
* blocker.release();
|
||||
* blocker.unblock();
|
||||
* ```
|
||||
*
|
||||
* The work submitted after `blocker.block(stream)` will not execute until
|
||||
* `blocker.release()` is called.
|
||||
* `blocker.unblock()` is called.
|
||||
*
|
||||
* ## Caveats and warnings
|
||||
*
|
||||
* - Every call to `block()` must be followed by a call to `release()`.
|
||||
* - Every call to `block()` must be followed by a call to `unblock()`.
|
||||
* - Do not queue "too much" work while blocking.
|
||||
* - Amount of work depends on device and driver.
|
||||
* - Do tests and schedule conservatively (~32 kernel launches max).
|
||||
@@ -58,7 +58,7 @@ struct blocking_kernel
|
||||
~blocking_kernel();
|
||||
|
||||
void block(const nvbench::cuda_stream &stream);
|
||||
void release();
|
||||
void unblock();
|
||||
|
||||
// move-only
|
||||
blocking_kernel(const blocking_kernel &) = delete;
|
||||
|
||||
@@ -118,7 +118,7 @@ private:
|
||||
m_cuda_timer.stop(m_launch.get_stream());
|
||||
|
||||
m_cpu_timer.start();
|
||||
blocker.release();
|
||||
blocker.unblock();
|
||||
NVBENCH_CUDA_CALL(cudaStreamSynchronize(m_launch.get_stream()));
|
||||
m_cpu_timer.stop();
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ private:
|
||||
}
|
||||
|
||||
m_cpu_timer.start();
|
||||
blocker.release(); // Start executing earlier launches
|
||||
blocker.unblock(); // Start executing earlier launches
|
||||
|
||||
for (nvbench::int64_t i = 0; i < unblocked_launches; ++i)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user