mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-03-25 17:47:47 +00:00
[CK] Fix windows build issues ## Motivation Full build on Windows is currently broken due to compiler errors, this PR should help fix that. This is also holding up the following PR in the TheRock: https://github.com/ROCm/TheRock/pull/3382 ## Technical Details 1. I don't see a good reason to be nesting a windows include inside the ck_tile namespace. It was causing compiler errors too: Windows.h comes with min and max, which was conflicting with ck_tile::min and ck_tile::max, so I moved it out. I also defined NOMINMAX to prevent this inclusion in the future. 2. The TRUE/FALSE macros are already used by Windows.h, which causes an error. So I've opted for True/False. You can see this pattern in other rocm-libraries. 3. The M_PI macro isn't available, at least in the WIN32_LEAN_AND_MEAN context, from \<cmath\> on Windows. We'll be able to use std::numbers::v_pi\<float\> when we have C++20 support. 4. There was a missing \<chrono\> include. ## Test Plan Test locally and make sure this doesn't impact existing CI. ## Test Result Compiles locally and passes existing ci. ## Submission Checklist - [ x ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
104 lines
2.5 KiB
C++
104 lines
2.5 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
|
|
#include <stdint.h>
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
// Windows
|
|
#if !defined(WIN32_LEAN_AND_MEAN)
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#endif
|
|
#if !defined(NOMINMAX)
|
|
#define NOMINMAX
|
|
#endif
|
|
#include <Windows.h>
|
|
#endif
|
|
|
|
namespace ck_tile {
|
|
|
|
// Time structure to hold nanoseconds since epoch or arbitrary start point
|
|
struct timepoint_t
|
|
{
|
|
int64_t nanoseconds;
|
|
};
|
|
|
|
// Platform-specific includes and implementation
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
|
|
static inline timepoint_t high_res_now()
|
|
{
|
|
// Cache the performance counter frequency; it is constant for the system lifetime.
|
|
static LARGE_INTEGER frequency = []() {
|
|
LARGE_INTEGER f;
|
|
QueryPerformanceFrequency(&f);
|
|
return f;
|
|
}();
|
|
|
|
LARGE_INTEGER counter;
|
|
timepoint_t tp;
|
|
QueryPerformanceCounter(&counter);
|
|
|
|
// Convert to nanoseconds using floating-point to avoid 64-bit integer overflow
|
|
tp.nanoseconds =
|
|
static_cast<int64_t>((static_cast<long double>(counter.QuadPart) * 1000000000.0L) /
|
|
static_cast<long double>(frequency.QuadPart));
|
|
|
|
return tp;
|
|
}
|
|
|
|
#elif defined(__linux__) || defined(__unix__) || defined(_POSIX_VERSION)
|
|
// Linux/Unix/POSIX
|
|
#include <time.h>
|
|
|
|
static inline timepoint_t high_res_now()
|
|
{
|
|
struct timespec ts;
|
|
timepoint_t tp;
|
|
|
|
// Use CLOCK_MONOTONIC for consistent timing unaffected by system time changes
|
|
// Use CLOCK_REALTIME if you need wall-clock time
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
|
|
tp.nanoseconds = static_cast<int64_t>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
|
|
|
|
return tp;
|
|
}
|
|
|
|
#else
|
|
// Fallback for other platforms
|
|
#include <time.h>
|
|
|
|
static inline timepoint_t high_res_now()
|
|
{
|
|
timepoint_t tp;
|
|
time_t t = time(NULL);
|
|
tp.nanoseconds = static_cast<int64_t>(t * 1000000000LL);
|
|
return tp;
|
|
}
|
|
|
|
#endif
|
|
|
|
// Duration calculation functions
|
|
static inline int64_t duration_ns(timepoint_t start, timepoint_t end)
|
|
{
|
|
return end.nanoseconds - start.nanoseconds;
|
|
}
|
|
|
|
static inline int64_t duration_us(timepoint_t start, timepoint_t end)
|
|
{
|
|
return (end.nanoseconds - start.nanoseconds) / 1000LL;
|
|
}
|
|
|
|
static inline int64_t duration_ms(timepoint_t start, timepoint_t end)
|
|
{
|
|
return (end.nanoseconds - start.nanoseconds) / 1000000LL;
|
|
}
|
|
|
|
static inline double duration_sec(timepoint_t start, timepoint_t end)
|
|
{
|
|
return static_cast<double>(end.nanoseconds - start.nanoseconds) / 1000000000.0;
|
|
}
|
|
|
|
} // namespace ck_tile
|