From c42cd283708bd56166395784ba6c603847cea3e4 Mon Sep 17 00:00:00 2001 From: Cong Ma <142121551+CongMa13@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:31:02 -0700 Subject: [PATCH] [CK TILE] remove dependency on std chrono (#3599) * [CK TILE] remove dependency on std chrono * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> [ROCm/composable_kernel commit: 0517d43d312356c62cc33bea4f0ecc5613e87079] --- include/ck_tile/host.hpp | 1 + include/ck_tile/host/high_res_cpu_clock.hpp | 95 +++++++++++++++++++++ include/ck_tile/host/timer.hpp | 16 ++-- 3 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 include/ck_tile/host/high_res_cpu_clock.hpp diff --git a/include/ck_tile/host.hpp b/include/ck_tile/host.hpp index b543fd84e9..014fcfdd65 100644 --- a/include/ck_tile/host.hpp +++ b/include/ck_tile/host.hpp @@ -11,6 +11,7 @@ #include "ck_tile/host/device_prop.hpp" #include "ck_tile/host/fill.hpp" #include "ck_tile/host/flush_icache.hpp" +#include "ck_tile/host/high_res_cpu_clock.hpp" #include "ck_tile/host/hip_check_error.hpp" #include "ck_tile/host/host_tensor.hpp" #include "ck_tile/host/joinable_thread.hpp" diff --git a/include/ck_tile/host/high_res_cpu_clock.hpp b/include/ck_tile/host/high_res_cpu_clock.hpp new file mode 100644 index 0000000000..c86f7368d4 --- /dev/null +++ b/include/ck_tile/host/high_res_cpu_clock.hpp @@ -0,0 +1,95 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +namespace ck_tile { + +// Time structure to hold nanoseconds since epoch or arbitrary start point +struct timepoint_t +{ + int64_t nanoseconds; +}; + +// Platform-specific includes and implementation +#if defined(_WIN32) || defined(_WIN64) +// Windows +#include + +static inline timepoint_t high_res_now() +{ + // Cache the performance counter frequency; it is constant for the system lifetime. + static LARGE_INTEGER frequency = []() { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + return f; + }(); + + LARGE_INTEGER counter; + timepoint_t tp; + QueryPerformanceCounter(&counter); + + // Convert to nanoseconds using floating-point to avoid 64-bit integer overflow + tp.nanoseconds = + static_cast((static_cast(counter.QuadPart) * 1000000000.0L) / + static_cast(frequency.QuadPart)); + + return tp; +} + +#elif defined(__linux__) || defined(__unix__) || defined(_POSIX_VERSION) +// Linux/Unix/POSIX +#include + +static inline timepoint_t high_res_now() +{ + struct timespec ts; + timepoint_t tp; + + // Use CLOCK_MONOTONIC for consistent timing unaffected by system time changes + // Use CLOCK_REALTIME if you need wall-clock time + clock_gettime(CLOCK_MONOTONIC, &ts); + + tp.nanoseconds = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); + + return tp; +} + +#else +// Fallback for other platforms +#include + +static inline timepoint_t high_res_now() +{ + timepoint_t tp; + time_t t = time(NULL); + tp.nanoseconds = static_cast(t * 1000000000LL); + return tp; +} + +#endif + +// Duration calculation functions +static inline int64_t duration_ns(timepoint_t start, timepoint_t end) +{ + return end.nanoseconds - start.nanoseconds; +} + +static inline int64_t duration_us(timepoint_t start, timepoint_t end) +{ + return (end.nanoseconds - start.nanoseconds) / 1000LL; +} + +static inline int64_t duration_ms(timepoint_t start, timepoint_t end) +{ + return (end.nanoseconds - start.nanoseconds) / 1000000LL; +} + +static inline double duration_sec(timepoint_t start, timepoint_t end) +{ + return static_cast(end.nanoseconds - start.nanoseconds) / 1000000000.0; +} + +} // namespace ck_tile diff --git a/include/ck_tile/host/timer.hpp b/include/ck_tile/host/timer.hpp index 1d641d1812..a300c877e8 100644 --- a/include/ck_tile/host/timer.hpp +++ b/include/ck_tile/host/timer.hpp @@ -5,9 +5,9 @@ #include "ck_tile/core/config.hpp" #include "ck_tile/host/hip_check_error.hpp" +#include "ck_tile/host/high_res_cpu_clock.hpp" #include #include -#include namespace ck_tile { @@ -54,26 +54,24 @@ struct cpu_timer CK_TILE_HOST void start(const hipStream_t& s) { HIP_CHECK_ERROR(hipStreamSynchronize(s)); - start_tick = std::chrono::high_resolution_clock::now(); + start_tick = high_res_now(); } // torch.utils.benchmark.Timer(), there is a sync inside each timer callback CK_TILE_HOST void stop(const hipStream_t& s) { HIP_CHECK_ERROR(hipStreamSynchronize(s)); - stop_tick = std::chrono::high_resolution_clock::now(); + stop_tick = high_res_now(); } // return in ms CK_TILE_HOST float duration() const { - double sec = - std::chrono::duration_cast>(stop_tick - start_tick) - .count(); - return static_cast(sec * 1e3); + auto us = duration_us(start_tick, stop_tick); + return static_cast(us) / 1e3; } private: - std::chrono::time_point start_tick; - std::chrono::time_point stop_tick; + timepoint_t start_tick; + timepoint_t stop_tick; }; } // namespace ck_tile