mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 22:22:27 +00:00
This commit is contained in:
103
include/ck_tile/host/high_res_cpu_clock.hpp
Normal file
103
include/ck_tile/host/high_res_cpu_clock.hpp
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
// Windows
|
||||
#if !defined(WIN32_LEAN_AND_MEAN)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#if !defined(NOMINMAX)
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
// Time structure to hold nanoseconds since epoch or arbitrary start point
|
||||
struct timepoint_t
|
||||
{
|
||||
int64_t nanoseconds;
|
||||
};
|
||||
|
||||
// Platform-specific includes and implementation
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
static inline timepoint_t high_res_now()
|
||||
{
|
||||
// Cache the performance counter frequency; it is constant for the system lifetime.
|
||||
static LARGE_INTEGER frequency = []() {
|
||||
LARGE_INTEGER f;
|
||||
QueryPerformanceFrequency(&f);
|
||||
return f;
|
||||
}();
|
||||
|
||||
LARGE_INTEGER counter;
|
||||
timepoint_t tp;
|
||||
QueryPerformanceCounter(&counter);
|
||||
|
||||
// Convert to nanoseconds using floating-point to avoid 64-bit integer overflow
|
||||
tp.nanoseconds =
|
||||
static_cast<int64_t>((static_cast<long double>(counter.QuadPart) * 1000000000.0L) /
|
||||
static_cast<long double>(frequency.QuadPart));
|
||||
|
||||
return tp;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) || defined(__unix__) || defined(_POSIX_VERSION)
|
||||
// Linux/Unix/POSIX
|
||||
#include <time.h>
|
||||
|
||||
static inline timepoint_t high_res_now()
|
||||
{
|
||||
struct timespec ts;
|
||||
timepoint_t tp;
|
||||
|
||||
// Use CLOCK_MONOTONIC for consistent timing unaffected by system time changes
|
||||
// Use CLOCK_REALTIME if you need wall-clock time
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
|
||||
tp.nanoseconds = static_cast<int64_t>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
|
||||
|
||||
return tp;
|
||||
}
|
||||
|
||||
#else
|
||||
// Fallback for other platforms
|
||||
#include <time.h>
|
||||
|
||||
static inline timepoint_t high_res_now()
|
||||
{
|
||||
timepoint_t tp;
|
||||
time_t t = time(NULL);
|
||||
tp.nanoseconds = static_cast<int64_t>(t * 1000000000LL);
|
||||
return tp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Duration calculation functions
|
||||
static inline int64_t duration_ns(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return end.nanoseconds - start.nanoseconds;
|
||||
}
|
||||
|
||||
static inline int64_t duration_us(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return (end.nanoseconds - start.nanoseconds) / 1000LL;
|
||||
}
|
||||
|
||||
static inline int64_t duration_ms(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return (end.nanoseconds - start.nanoseconds) / 1000000LL;
|
||||
}
|
||||
|
||||
static inline double duration_sec(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return static_cast<double>(end.nanoseconds - start.nanoseconds) / 1000000000.0;
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
Reference in New Issue
Block a user