From 1927528b44a17a520f6723e14a1169054e89344d Mon Sep 17 00:00:00 2001 From: Brock Hargreaves Date: Wed, 25 Feb 2026 16:12:19 +0000 Subject: [PATCH] [CK] Fix windows build issues (#4819) ## Motivation Full build on Windows is currently broken due to compiler errors, this PR should help fix that. This is also holding up the following PR in the TheRock: https://github.com/ROCm/TheRock/pull/3382 ## Technical Details 1. I don't see a good reason to be nesting a windows include inside the ck_tile namespace. It was causing compiler errors too: Windows.h comes with min and max, which was conflicting with ck_tile::min and ck_tile::max, so I moved it out. I also defined NOMINMAX to prevent this inclusion in the future. 2. The TRUE/FALSE macros are already used by Windows.h, which causes an error. So I've opted for True/False. You can see this pattern in other rocm-libraries. 3. The M_PI macro isn't available, at least in the WIN32_LEAN_AND_MEAN context, from \ on Windows. We'll be able to use std::numbers::v_pi\ when we have C++20 support. 4. There was a missing \ include. ## Test Plan Test locally and make sure this doesn't impact existing CI. ## Test Result Compiles locally and passes existing ci. ## Submission Checklist - [ x ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --- example/ck_tile/01_fmha/rotary.hpp | 5 +++++ .../run_batched_contraction_example.inc | 1 + include/ck_tile/host/high_res_cpu_clock.hpp | 12 ++++++++++-- .../fused_moegemm_pipeline_flatmm_ex.hpp | 18 +++++++++--------- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/example/ck_tile/01_fmha/rotary.hpp b/example/ck_tile/01_fmha/rotary.hpp index d4fa3451df..a6458c2173 100644 --- a/example/ck_tile/01_fmha/rotary.hpp +++ b/example/ck_tile/01_fmha/rotary.hpp @@ -8,6 +8,11 @@ #include #include + +#ifndef M_PI // Not there on windows... +#define M_PI 3.141592653589793238462643383279502884 +#endif + #include #include #include diff --git a/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc b/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc index 214b14633d..e9e9832002 100644 --- a/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc +++ b/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc @@ -6,6 +6,7 @@ #include #include #include +#include #include "contraction_utils.hpp" #include "ck_tile/host/reference/reference_batched_contraction.hpp" diff --git a/include/ck_tile/host/high_res_cpu_clock.hpp b/include/ck_tile/host/high_res_cpu_clock.hpp index c86f7368d4..4a9eb7e0ae 100644 --- a/include/ck_tile/host/high_res_cpu_clock.hpp +++ b/include/ck_tile/host/high_res_cpu_clock.hpp @@ -4,6 +4,16 @@ #pragma once #include +#if defined(_WIN32) || defined(_WIN64) +// Windows +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#if !defined(NOMINMAX) +#define NOMINMAX +#endif +#include +#endif namespace ck_tile { @@ -15,8 +25,6 @@ struct timepoint_t // Platform-specific includes and implementation #if defined(_WIN32) || defined(_WIN64) -// Windows -#include static inline timepoint_t high_res_now() { diff --git a/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp b/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp index 46a7acbb12..60245438fe 100644 --- a/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp +++ b/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp @@ -109,8 +109,8 @@ struct FusedMoeGemmPipeline_FlatmmEx constexpr auto NEG1 = number<-1>{}; constexpr auto I0 = number<0>{}; constexpr auto I1 = number<1>{}; - constexpr auto TRUE = bool_constant{}; - constexpr auto FALSE = bool_constant{}; + constexpr auto True = bool_constant{}; + constexpr auto False = bool_constant{}; CK_TILE_LDS_ADDR ADataType* smem_0 = reinterpret_cast(smem); CK_TILE_LDS_ADDR ADataType* smem_1 = reinterpret_cast( @@ -291,7 +291,7 @@ struct FusedMoeGemmPipeline_FlatmmEx g_win.bottom_tensor_view_ = u_view; } } - load_tile_raw(g_, g_win, i_access, FALSE, PreNop{}); + load_tile_raw(g_, g_win, i_access, False, PreNop{}); }; auto move_g = [&]() { move_tile_window(g_win, {number<0>{}, number{}, number<0>{}}); @@ -300,7 +300,7 @@ struct FusedMoeGemmPipeline_FlatmmEx auto gld_d = [&]>(auto& d_, auto i_access, PreNop = {}) { - load_tile_raw(d_, d_win, i_access, FALSE, PreNop{}); + load_tile_raw(d_, d_win, i_access, False, PreNop{}); }; auto move_d = [&]() { // d move along gemm-n @@ -309,7 +309,7 @@ struct FusedMoeGemmPipeline_FlatmmEx auto atomic_add_o = [&]>(auto& o_, auto i_access, PreNop = {}) { - update_tile_raw(o_win, o_, i_access, TRUE, PreNop{}); + update_tile_raw(o_win, o_, i_access, True, PreNop{}); }; auto acc_0 = Policy::template MakeCBlockTile_Gemm0(); @@ -502,9 +502,9 @@ struct FusedMoeGemmPipeline_FlatmmEx static_for<0, total_loops, 1>{}([&](auto i_issue) { constexpr auto last_nop = [&]() { if constexpr(i_issue == (total_loops - 1)) - return TRUE; + return True; else - return FALSE; + return False; }(); gemm_0(acc_0, as[I1], gs[I1], i_issue, last_nop); // last gemm has nop }); @@ -607,8 +607,8 @@ struct FusedMoeGemmPipeline_FlatmmEx // start of pipeline // clang-format off - gld_a(a_sst_win0, NEG1, TRUE); - gld_g(gs[I0], NEG1, TRUE); + gld_a(a_sst_win0, NEG1, True); + gld_g(gs[I0], NEG1, True); move_a(); move_g(); clear_tile(acc_0);