mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[CK TILE] remove dependency on std chrono (#3599)
* [CK TILE] remove dependency on std chrono * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -11,6 +11,7 @@
|
||||
#include "ck_tile/host/device_prop.hpp"
|
||||
#include "ck_tile/host/fill.hpp"
|
||||
#include "ck_tile/host/flush_icache.hpp"
|
||||
#include "ck_tile/host/high_res_cpu_clock.hpp"
|
||||
#include "ck_tile/host/hip_check_error.hpp"
|
||||
#include "ck_tile/host/host_tensor.hpp"
|
||||
#include "ck_tile/host/joinable_thread.hpp"
|
||||
|
||||
95
include/ck_tile/host/high_res_cpu_clock.hpp
Normal file
95
include/ck_tile/host/high_res_cpu_clock.hpp
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
// Time structure to hold nanoseconds since epoch or arbitrary start point
|
||||
struct timepoint_t
|
||||
{
|
||||
int64_t nanoseconds;
|
||||
};
|
||||
|
||||
// Platform-specific includes and implementation
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
// Windows
|
||||
#include <windows.h>
|
||||
|
||||
static inline timepoint_t high_res_now()
|
||||
{
|
||||
// Cache the performance counter frequency; it is constant for the system lifetime.
|
||||
static LARGE_INTEGER frequency = []() {
|
||||
LARGE_INTEGER f;
|
||||
QueryPerformanceFrequency(&f);
|
||||
return f;
|
||||
}();
|
||||
|
||||
LARGE_INTEGER counter;
|
||||
timepoint_t tp;
|
||||
QueryPerformanceCounter(&counter);
|
||||
|
||||
// Convert to nanoseconds using floating-point to avoid 64-bit integer overflow
|
||||
tp.nanoseconds =
|
||||
static_cast<int64_t>((static_cast<long double>(counter.QuadPart) * 1000000000.0L) /
|
||||
static_cast<long double>(frequency.QuadPart));
|
||||
|
||||
return tp;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) || defined(__unix__) || defined(_POSIX_VERSION)
|
||||
// Linux/Unix/POSIX
|
||||
#include <time.h>
|
||||
|
||||
static inline timepoint_t high_res_now()
|
||||
{
|
||||
struct timespec ts;
|
||||
timepoint_t tp;
|
||||
|
||||
// Use CLOCK_MONOTONIC for consistent timing unaffected by system time changes
|
||||
// Use CLOCK_REALTIME if you need wall-clock time
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
|
||||
tp.nanoseconds = static_cast<int64_t>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
|
||||
|
||||
return tp;
|
||||
}
|
||||
|
||||
#else
|
||||
// Fallback for other platforms
|
||||
#include <time.h>
|
||||
|
||||
static inline timepoint_t high_res_now()
|
||||
{
|
||||
timepoint_t tp;
|
||||
time_t t = time(NULL);
|
||||
tp.nanoseconds = static_cast<int64_t>(t * 1000000000LL);
|
||||
return tp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Duration calculation functions
|
||||
static inline int64_t duration_ns(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return end.nanoseconds - start.nanoseconds;
|
||||
}
|
||||
|
||||
static inline int64_t duration_us(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return (end.nanoseconds - start.nanoseconds) / 1000LL;
|
||||
}
|
||||
|
||||
static inline int64_t duration_ms(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return (end.nanoseconds - start.nanoseconds) / 1000000LL;
|
||||
}
|
||||
|
||||
static inline double duration_sec(timepoint_t start, timepoint_t end)
|
||||
{
|
||||
return static_cast<double>(end.nanoseconds - start.nanoseconds) / 1000000000.0;
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
#include "ck_tile/core/config.hpp"
|
||||
#include "ck_tile/host/hip_check_error.hpp"
|
||||
#include "ck_tile/host/high_res_cpu_clock.hpp"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <cstddef>
|
||||
#include <chrono>
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
@@ -54,26 +54,24 @@ struct cpu_timer
|
||||
CK_TILE_HOST void start(const hipStream_t& s)
|
||||
{
|
||||
HIP_CHECK_ERROR(hipStreamSynchronize(s));
|
||||
start_tick = std::chrono::high_resolution_clock::now();
|
||||
start_tick = high_res_now();
|
||||
}
|
||||
// torch.utils.benchmark.Timer(), there is a sync inside each timer callback
|
||||
CK_TILE_HOST void stop(const hipStream_t& s)
|
||||
{
|
||||
HIP_CHECK_ERROR(hipStreamSynchronize(s));
|
||||
stop_tick = std::chrono::high_resolution_clock::now();
|
||||
stop_tick = high_res_now();
|
||||
}
|
||||
// return in ms
|
||||
CK_TILE_HOST float duration() const
|
||||
{
|
||||
double sec =
|
||||
std::chrono::duration_cast<std::chrono::duration<double>>(stop_tick - start_tick)
|
||||
.count();
|
||||
return static_cast<float>(sec * 1e3);
|
||||
auto us = duration_us(start_tick, stop_tick);
|
||||
return static_cast<float>(us) / 1e3;
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> start_tick;
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> stop_tick;
|
||||
timepoint_t start_tick;
|
||||
timepoint_t stop_tick;
|
||||
};
|
||||
|
||||
} // namespace ck_tile
|
||||
|
||||
Reference in New Issue
Block a user