Files
nvbench/nvbench/cuda_timer.cuh
clang-format 8f1152d4a2 Auto format
2022-08-23 02:34:35 +04:00

87 lines
2.2 KiB
Plaintext

/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/cuda_call.cuh>
#include <nvbench/types.cuh>
#include <cuda_runtime_api.h>
namespace nvbench
{
struct cuda_timer
{
__forceinline__ cuda_timer()
{
NVBENCH_CUDA_CALL(cudaEventCreate(&m_start));
NVBENCH_CUDA_CALL(cudaEventCreate(&m_stop));
}
__forceinline__ ~cuda_timer()
{
NVBENCH_CUDA_CALL(cudaEventDestroy(m_start));
NVBENCH_CUDA_CALL(cudaEventDestroy(m_stop));
}
// move-only
cuda_timer(const cuda_timer &) = delete;
cuda_timer(cuda_timer &&) = default;
cuda_timer &operator=(const cuda_timer &) = delete;
cuda_timer &operator=(cuda_timer &&) = default;
__forceinline__ void start(cudaStream_t stream)
{
NVBENCH_CUDA_CALL(cudaEventRecord(m_start, stream));
}
__forceinline__ void stop(cudaStream_t stream)
{
NVBENCH_CUDA_CALL(cudaEventRecord(m_stop, stream));
}
[[nodiscard]] __forceinline__ bool ready() const
{
const cudaError_t state = cudaEventQuery(m_stop);
if (state == cudaErrorNotReady)
{
return false;
}
NVBENCH_CUDA_CALL(state);
return true;
}
// In seconds:
[[nodiscard]] __forceinline__ nvbench::float64_t get_duration() const
{
NVBENCH_CUDA_CALL(cudaEventSynchronize(m_stop));
float elapsed_time;
// According to docs, this is in ms with a resolution of ~0.5 microseconds.
NVBENCH_CUDA_CALL(cudaEventElapsedTime(&elapsed_time, m_start, m_stop));
return elapsed_time / 1000.0;
}
private:
cudaEvent_t m_start;
cudaEvent_t m_stop;
};
} // namespace nvbench