Files
composable_kernel/include/ck_tile/host/high_res_cpu_clock.hpp
Brock Hargreaves abf13bdec1 [rocm-libraries] ROCm/rocm-libraries#4819 (commit b995a0b)
[CK] Fix windows build issues

## Motivation

Full build on Windows is currently broken due to compiler errors, this
PR should help fix that. This is also holding up the following PR in the
TheRock: https://github.com/ROCm/TheRock/pull/3382

## Technical Details

1. I don't see a good reason to be nesting a windows include inside the
ck_tile namespace. It was causing compiler errors too: Windows.h comes
with min and max, which was conflicting with ck_tile::min and
ck_tile::max, so I moved it out. I also defined NOMINMAX to prevent this
inclusion in the future.
2. The TRUE/FALSE macros are already used by Windows.h, which causes an
error. So I've opted for True/False. You can see this pattern in other
rocm-libraries.
3. The M_PI macro isn't available, at least in the WIN32_LEAN_AND_MEAN
context, from \<cmath\> on Windows. We'll be able to use
std::numbers::v_pi\<float\> when we have C++20 support.
4. There was a missing \<chrono\> include.

## Test Plan

Test locally and make sure this doesn't impact existing CI.

## Test Result

Compiles locally and passes existing ci.

## Submission Checklist

- [ x ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
2026-02-25 16:13:13 +00:00

104 lines
2.5 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include <stdint.h>
#if defined(_WIN32) || defined(_WIN64)
// Windows
#if !defined(WIN32_LEAN_AND_MEAN)
#define WIN32_LEAN_AND_MEAN
#endif
#if !defined(NOMINMAX)
#define NOMINMAX
#endif
#include <Windows.h>
#endif
namespace ck_tile {
// Time structure to hold nanoseconds since epoch or arbitrary start point
struct timepoint_t
{
int64_t nanoseconds;
};
// Platform-specific includes and implementation
#if defined(_WIN32) || defined(_WIN64)
static inline timepoint_t high_res_now()
{
// Cache the performance counter frequency; it is constant for the system lifetime.
static LARGE_INTEGER frequency = []() {
LARGE_INTEGER f;
QueryPerformanceFrequency(&f);
return f;
}();
LARGE_INTEGER counter;
timepoint_t tp;
QueryPerformanceCounter(&counter);
// Convert to nanoseconds using floating-point to avoid 64-bit integer overflow
tp.nanoseconds =
static_cast<int64_t>((static_cast<long double>(counter.QuadPart) * 1000000000.0L) /
static_cast<long double>(frequency.QuadPart));
return tp;
}
#elif defined(__linux__) || defined(__unix__) || defined(_POSIX_VERSION)
// Linux/Unix/POSIX
#include <time.h>
static inline timepoint_t high_res_now()
{
struct timespec ts;
timepoint_t tp;
// Use CLOCK_MONOTONIC for consistent timing unaffected by system time changes
// Use CLOCK_REALTIME if you need wall-clock time
clock_gettime(CLOCK_MONOTONIC, &ts);
tp.nanoseconds = static_cast<int64_t>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
return tp;
}
#else
// Fallback for other platforms
#include <time.h>
static inline timepoint_t high_res_now()
{
timepoint_t tp;
time_t t = time(NULL);
tp.nanoseconds = static_cast<int64_t>(t * 1000000000LL);
return tp;
}
#endif
// Duration calculation functions
static inline int64_t duration_ns(timepoint_t start, timepoint_t end)
{
return end.nanoseconds - start.nanoseconds;
}
static inline int64_t duration_us(timepoint_t start, timepoint_t end)
{
return (end.nanoseconds - start.nanoseconds) / 1000LL;
}
static inline int64_t duration_ms(timepoint_t start, timepoint_t end)
{
return (end.nanoseconds - start.nanoseconds) / 1000000LL;
}
static inline double duration_sec(timepoint_t start, timepoint_t end)
{
return static_cast<double>(end.nanoseconds - start.nanoseconds) / 1000000000.0;
}
} // namespace ck_tile