Files
nvbench/nvbench/exec_tag.cuh
Georgy Evtushenko 2ef69e9ba6 No batch exec tag
2023-11-02 11:10:27 -07:00

129 lines
4.0 KiB
Plaintext

/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/flags.cuh>
#include <type_traits>
namespace nvbench::detail
{
// See the similarly named tags in nvbench::exec_tag:: for documentation.
enum class exec_flag
{
none = 0x0,
// Modifiers:
timer = 0x01, // KernelLauncher uses manual timing
no_block = 0x02, // Disables use of `blocking_kernel`.
sync = 0x04, // KernelLauncher has indicated that it will sync
run_once = 0x08, // Only run the benchmark once (for profiling).
modifier_mask = timer | no_block | sync | run_once,
// Measurement types:
cold = 0x0100, // measure_cold
hot = 0x0200, // measure_hot
measure_mask = cold | hot
};
} // namespace nvbench::detail
NVBENCH_DECLARE_FLAGS(nvbench::detail::exec_flag)
namespace nvbench::exec_tag
{
namespace impl
{
struct tag_base
{};
template <typename ExecTag>
constexpr inline bool is_exec_tag_v = std::is_base_of_v<tag_base, ExecTag>;
/// Base class for exec_tag functionality.
/// This exists so that the `exec_flag`s can be embedded in a type with flag
/// semantics. This allows state::exec to only instantiate the measurements
/// that are actually used.
template <nvbench::detail::exec_flag Flags>
struct tag
: std::integral_constant<nvbench::detail::exec_flag, Flags>
, tag_base
{
static constexpr nvbench::detail::exec_flag flags = Flags;
template <nvbench::detail::exec_flag OFlags>
constexpr auto operator|(tag<OFlags>) const
{
return tag<Flags | OFlags>{};
}
template <nvbench::detail::exec_flag OFlags>
constexpr auto operator&(tag<OFlags>) const
{
return tag<Flags & OFlags>{};
}
constexpr auto operator~() const { return tag<~Flags>{}; }
constexpr operator bool() const // NOLINT(google-explicit-constructor)
{
return Flags != nvbench::detail::exec_flag::none;
}
};
using none_t = tag<nvbench::detail::exec_flag::none>;
using timer_t = tag<nvbench::detail::exec_flag::timer>;
using no_block_t = tag<nvbench::detail::exec_flag::no_block>;
using sync_t = tag<nvbench::detail::exec_flag::sync>;
using run_once_t = tag<nvbench::detail::exec_flag::run_once>;
using hot_t = tag<nvbench::detail::exec_flag::hot>;
using cold_t = tag<nvbench::detail::exec_flag::cold>;
using modifier_mask_t = tag<nvbench::detail::exec_flag::modifier_mask>;
using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
constexpr inline none_t none;
constexpr inline timer_t timer;
constexpr inline no_block_t no_block;
constexpr inline sync_t sync;
constexpr inline run_once_t run_once;
constexpr inline cold_t cold;
constexpr inline hot_t hot;
constexpr inline modifier_mask_t modifier_mask;
constexpr inline measure_mask_t measure_mask;
} // namespace impl
constexpr inline auto none = nvbench::exec_tag::impl::none;
/// Modifier used when only a portion of the KernelLauncher needs to be timed.
/// Useful for resetting state in-between timed kernel launches.
constexpr inline auto timer = nvbench::exec_tag::impl::timer;
/// Modifier used to indicate that the KernelGenerator will perform CUDA
/// synchronizations. Without this flag such benchmarks will deadlock.
constexpr inline auto sync = nvbench::exec_tag::impl::no_block | nvbench::exec_tag::impl::sync;
/// Modifier used to indicate that batched measurements should be disabled
constexpr inline auto no_batch = nvbench::exec_tag::impl::cold;
} // namespace nvbench::exec_tag