mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Better place to stop freq timer?
This commit is contained in:
committed by
Oleksandr Pavlyk
parent
a487a38895
commit
40b2f4ece2
@@ -137,25 +137,33 @@ struct measure_cold_base::kernel_launch_timer
|
|||||||
: m_measure{measure}
|
: m_measure{measure}
|
||||||
, m_disable_blocking_kernel{measure.m_disable_blocking_kernel}
|
, m_disable_blocking_kernel{measure.m_disable_blocking_kernel}
|
||||||
, m_run_once{measure.m_run_once}
|
, m_run_once{measure.m_run_once}
|
||||||
|
, m_check_throttling{measure.m_check_throttling}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
explicit kernel_launch_timer(measure_cold_base &measure,
|
explicit kernel_launch_timer(measure_cold_base &measure,
|
||||||
bool disable_blocking_kernel,
|
bool disable_blocking_kernel,
|
||||||
bool run_once)
|
bool run_once,
|
||||||
|
bool check_throttling)
|
||||||
: m_measure{measure}
|
: m_measure{measure}
|
||||||
, m_disable_blocking_kernel{disable_blocking_kernel}
|
, m_disable_blocking_kernel{disable_blocking_kernel}
|
||||||
, m_run_once{run_once}
|
, m_run_once{run_once}
|
||||||
|
, m_check_throttling{check_throttling}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
__forceinline__ void start()
|
__forceinline__ void start()
|
||||||
{
|
{
|
||||||
m_measure.flush_device_l2();
|
m_measure.flush_device_l2();
|
||||||
m_measure.sync_stream();
|
m_measure.sync_stream();
|
||||||
|
|
||||||
|
// start CPU timer irrespective of use of blocking kernel
|
||||||
|
// Ref: https://github.com/NVIDIA/nvbench/issues/249
|
||||||
|
m_measure.m_cpu_timer.start();
|
||||||
|
|
||||||
if (!m_disable_blocking_kernel)
|
if (!m_disable_blocking_kernel)
|
||||||
{
|
{
|
||||||
m_measure.block_stream();
|
m_measure.block_stream();
|
||||||
}
|
}
|
||||||
if (m_measure.m_check_throttling)
|
if (m_check_throttling)
|
||||||
{
|
{
|
||||||
m_measure.gpu_frequency_start();
|
m_measure.gpu_frequency_start();
|
||||||
}
|
}
|
||||||
@@ -164,22 +172,19 @@ struct measure_cold_base::kernel_launch_timer
|
|||||||
m_measure.profiler_start();
|
m_measure.profiler_start();
|
||||||
}
|
}
|
||||||
m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
|
m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
|
||||||
// start CPU timer irrespective of use of blocking kernel
|
|
||||||
// Ref: https://github.com/NVIDIA/nvbench/issues/249
|
|
||||||
m_measure.m_cpu_timer.start();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline__ void stop()
|
__forceinline__ void stop()
|
||||||
{
|
{
|
||||||
m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
|
m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
|
||||||
|
if (m_check_throttling)
|
||||||
|
{
|
||||||
|
m_measure.gpu_frequency_stop();
|
||||||
|
}
|
||||||
if (!m_disable_blocking_kernel)
|
if (!m_disable_blocking_kernel)
|
||||||
{
|
{
|
||||||
m_measure.unblock_stream();
|
m_measure.unblock_stream();
|
||||||
}
|
}
|
||||||
if (m_measure.m_check_throttling)
|
|
||||||
{
|
|
||||||
m_measure.gpu_frequency_stop();
|
|
||||||
}
|
|
||||||
m_measure.sync_stream();
|
m_measure.sync_stream();
|
||||||
if (m_run_once)
|
if (m_run_once)
|
||||||
{
|
{
|
||||||
@@ -192,6 +197,7 @@ private:
|
|||||||
measure_cold_base &m_measure;
|
measure_cold_base &m_measure;
|
||||||
bool m_disable_blocking_kernel;
|
bool m_disable_blocking_kernel;
|
||||||
bool m_run_once;
|
bool m_run_once;
|
||||||
|
bool m_check_throttling;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename KernelLauncher>
|
template <typename KernelLauncher>
|
||||||
@@ -228,7 +234,7 @@ private:
|
|||||||
// disable use of blocking kernel for warm-up run
|
// disable use of blocking kernel for warm-up run
|
||||||
// see https://github.com/NVIDIA/nvbench/issues/240
|
// see https://github.com/NVIDIA/nvbench/issues/240
|
||||||
constexpr bool disable_blocking_kernel = true;
|
constexpr bool disable_blocking_kernel = true;
|
||||||
kernel_launch_timer timer(*this, disable_blocking_kernel, m_run_once);
|
kernel_launch_timer timer(*this, disable_blocking_kernel, m_run_once, m_check_throttling);
|
||||||
|
|
||||||
this->launch_kernel(timer);
|
this->launch_kernel(timer);
|
||||||
this->check_skip_time(m_cuda_timer.get_duration());
|
this->check_skip_time(m_cuda_timer.get_duration());
|
||||||
@@ -239,7 +245,7 @@ private:
|
|||||||
// do not use blocking kernel if benchmark is only run once, e.g., when profiling
|
// do not use blocking kernel if benchmark is only run once, e.g., when profiling
|
||||||
// ref: https://github.com/NVIDIA/nvbench/issue/242
|
// ref: https://github.com/NVIDIA/nvbench/issue/242
|
||||||
const bool disable_blocking_kernel = m_run_once || m_disable_blocking_kernel;
|
const bool disable_blocking_kernel = m_run_once || m_disable_blocking_kernel;
|
||||||
kernel_launch_timer timer(*this, disable_blocking_kernel, m_run_once);
|
kernel_launch_timer timer(*this, disable_blocking_kernel, m_run_once, m_check_throttling);
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
this->launch_kernel(timer);
|
this->launch_kernel(timer);
|
||||||
|
|||||||
Reference in New Issue
Block a user