diff --git a/example/ck_tile/01_fmha/rotary.hpp b/example/ck_tile/01_fmha/rotary.hpp index d4fa3451df..a6458c2173 100644 --- a/example/ck_tile/01_fmha/rotary.hpp +++ b/example/ck_tile/01_fmha/rotary.hpp @@ -8,6 +8,11 @@ #include #include + +#ifndef M_PI // Not there on windows... +#define M_PI 3.141592653589793238462643383279502884 +#endif + #include #include #include diff --git a/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc b/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc index 214b14633d..e9e9832002 100644 --- a/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc +++ b/example/ck_tile/41_batched_contraction/run_batched_contraction_example.inc @@ -6,6 +6,7 @@ #include #include #include +#include #include "contraction_utils.hpp" #include "ck_tile/host/reference/reference_batched_contraction.hpp" diff --git a/include/ck_tile/host/high_res_cpu_clock.hpp b/include/ck_tile/host/high_res_cpu_clock.hpp index c86f7368d4..4a9eb7e0ae 100644 --- a/include/ck_tile/host/high_res_cpu_clock.hpp +++ b/include/ck_tile/host/high_res_cpu_clock.hpp @@ -4,6 +4,16 @@ #pragma once #include +#if defined(_WIN32) || defined(_WIN64) +// Windows +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#if !defined(NOMINMAX) +#define NOMINMAX +#endif +#include +#endif namespace ck_tile { @@ -15,8 +25,6 @@ struct timepoint_t // Platform-specific includes and implementation #if defined(_WIN32) || defined(_WIN64) -// Windows -#include static inline timepoint_t high_res_now() { diff --git a/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp b/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp index 46a7acbb12..60245438fe 100644 --- a/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp +++ b/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp @@ -109,8 +109,8 @@ struct FusedMoeGemmPipeline_FlatmmEx constexpr auto NEG1 = number<-1>{}; constexpr auto I0 = number<0>{}; constexpr auto I1 = number<1>{}; - constexpr auto TRUE = bool_constant{}; - constexpr auto FALSE = bool_constant{}; + constexpr auto True = bool_constant{}; + constexpr auto False = bool_constant{}; CK_TILE_LDS_ADDR ADataType* smem_0 = reinterpret_cast(smem); CK_TILE_LDS_ADDR ADataType* smem_1 = reinterpret_cast( @@ -291,7 +291,7 @@ struct FusedMoeGemmPipeline_FlatmmEx g_win.bottom_tensor_view_ = u_view; } } - load_tile_raw(g_, g_win, i_access, FALSE, PreNop{}); + load_tile_raw(g_, g_win, i_access, False, PreNop{}); }; auto move_g = [&]() { move_tile_window(g_win, {number<0>{}, number{}, number<0>{}}); @@ -300,7 +300,7 @@ struct FusedMoeGemmPipeline_FlatmmEx auto gld_d = [&]>(auto& d_, auto i_access, PreNop = {}) { - load_tile_raw(d_, d_win, i_access, FALSE, PreNop{}); + load_tile_raw(d_, d_win, i_access, False, PreNop{}); }; auto move_d = [&]() { // d move along gemm-n @@ -309,7 +309,7 @@ struct FusedMoeGemmPipeline_FlatmmEx auto atomic_add_o = [&]>(auto& o_, auto i_access, PreNop = {}) { - update_tile_raw(o_win, o_, i_access, TRUE, PreNop{}); + update_tile_raw(o_win, o_, i_access, True, PreNop{}); }; auto acc_0 = Policy::template MakeCBlockTile_Gemm0(); @@ -502,9 +502,9 @@ struct FusedMoeGemmPipeline_FlatmmEx static_for<0, total_loops, 1>{}([&](auto i_issue) { constexpr auto last_nop = [&]() { if constexpr(i_issue == (total_loops - 1)) - return TRUE; + return True; else - return FALSE; + return False; }(); gemm_0(acc_0, as[I1], gs[I1], i_issue, last_nop); // last gemm has nop }); @@ -607,8 +607,8 @@ struct FusedMoeGemmPipeline_FlatmmEx // start of pipeline // clang-format off - gld_a(a_sst_win0, NEG1, TRUE); - gld_g(gs[I0], NEG1, TRUE); + gld_a(a_sst_win0, NEG1, True); + gld_g(gs[I0], NEG1, True); move_a(); move_g(); clear_tile(acc_0);