mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-29 19:21:14 +00:00
Don't override m_check_throttling if throttling threshold is non-positive
measure_cold class now directly inherits m_check_throttling from state. This ensures that when `--jsonbin` is specified frequency data corresponding to timing data are available to write out.
This commit is contained in:
@@ -44,7 +44,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
|
|||||||
exec_state.get_stopping_criterion())}
|
exec_state.get_stopping_criterion())}
|
||||||
, m_disable_blocking_kernel{exec_state.get_disable_blocking_kernel()}
|
, m_disable_blocking_kernel{exec_state.get_disable_blocking_kernel()}
|
||||||
, m_run_once{exec_state.get_run_once()}
|
, m_run_once{exec_state.get_run_once()}
|
||||||
, m_check_throttling(!exec_state.get_run_once() && exec_state.get_throttle_threshold() > 0.f)
|
, m_check_throttling(!exec_state.get_run_once())
|
||||||
, m_min_samples{exec_state.get_min_samples()}
|
, m_min_samples{exec_state.get_min_samples()}
|
||||||
, m_skip_time{exec_state.get_skip_time()}
|
, m_skip_time{exec_state.get_skip_time()}
|
||||||
, m_timeout{exec_state.get_timeout()}
|
, m_timeout{exec_state.get_timeout()}
|
||||||
|
|||||||
@@ -140,6 +140,13 @@ struct measure_cold_base::kernel_launch_timer
|
|||||||
, m_check_throttling{measure.m_check_throttling}
|
, m_check_throttling{measure.m_check_throttling}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
explicit kernel_launch_timer(measure_cold_base &measure, bool disable_blocking_kernel)
|
||||||
|
: m_measure{measure}
|
||||||
|
, m_disable_blocking_kernel{disable_blocking_kernel}
|
||||||
|
, m_run_once{measure.m_run_once}
|
||||||
|
, m_check_throttling{measure.m_check_throttling}
|
||||||
|
{}
|
||||||
|
|
||||||
explicit kernel_launch_timer(measure_cold_base &measure,
|
explicit kernel_launch_timer(measure_cold_base &measure,
|
||||||
bool disable_blocking_kernel,
|
bool disable_blocking_kernel,
|
||||||
bool run_once,
|
bool run_once,
|
||||||
@@ -234,7 +241,7 @@ private:
|
|||||||
// disable use of blocking kernel for warm-up run
|
// disable use of blocking kernel for warm-up run
|
||||||
// see https://github.com/NVIDIA/nvbench/issues/240
|
// see https://github.com/NVIDIA/nvbench/issues/240
|
||||||
constexpr bool disable_blocking_kernel = true;
|
constexpr bool disable_blocking_kernel = true;
|
||||||
kernel_launch_timer timer(*this, disable_blocking_kernel, m_run_once, m_check_throttling);
|
kernel_launch_timer timer(*this, disable_blocking_kernel);
|
||||||
|
|
||||||
this->launch_kernel(timer);
|
this->launch_kernel(timer);
|
||||||
this->check_skip_time(m_cuda_timer.get_duration());
|
this->check_skip_time(m_cuda_timer.get_duration());
|
||||||
@@ -245,7 +252,7 @@ private:
|
|||||||
// do not use blocking kernel if benchmark is only run once, e.g., when profiling
|
// do not use blocking kernel if benchmark is only run once, e.g., when profiling
|
||||||
// ref: https://github.com/NVIDIA/nvbench/issue/242
|
// ref: https://github.com/NVIDIA/nvbench/issue/242
|
||||||
const bool disable_blocking_kernel = m_run_once || m_disable_blocking_kernel;
|
const bool disable_blocking_kernel = m_run_once || m_disable_blocking_kernel;
|
||||||
kernel_launch_timer timer(*this, disable_blocking_kernel, m_run_once, m_check_throttling);
|
kernel_launch_timer timer(*this, disable_blocking_kernel);
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
this->launch_kernel(timer);
|
this->launch_kernel(timer);
|
||||||
|
|||||||
Reference in New Issue
Block a user