From 8b594deb00cba91ed6375b905c3965d4ce8299a1 Mon Sep 17 00:00:00 2001 From: Brock Hargreaves Date: Tue, 3 Mar 2026 14:54:08 -0700 Subject: [PATCH] [CK] Address a bunch of errors associated with targeting gfx1200 on Windows (#5045) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation Still addressing errors that are blocking the merge of TheRock PR: https://github.com/ROCm/TheRock/actions/runs/22545831304/job/65308264096?pr=3382 ## Technical Details 1. There are multiple fmha python scripts that are writing native paths which are confusing cmake. I addressed one of these in an earlier PR https://github.com/ROCm/rocm-libraries/pull/4812 and now I'm addressing more that are exposed with gfx1200 target: ``` [composable_kernel configure] CMake Error at example/ck_tile/50_sparse_attn/CMakeLists.txt:61 (add_library): [composable_kernel configure] Syntax error in cmake code when parsing string [composable_kernel configure] [composable_kernel configure] B:\build\ml-libs\composable_kernel\build\example\ck_tile\50_sparse_attn\fmha_jenga_fwd_d128_fp16_batch_b128x128x32x128x32x128_r4x1x1_r4x1x1_w32x32x16_w32x32x16_qr_async_vr_psddv_nlogits_nbias_nmask_nskip_nsquant_ntrload.cpp [composable_kernel configure] [composable_kernel configure] Invalid character escape '\b'. ``` 2. In the following compiler error we see gemm_prec_str being passed as a function to concat(...), instead of being evaluated with the parenthesis operator(), i.e., gemm_prec_str(). There are multiples instances of this, I wonder what non-msvc compilers do here: ``` [composable_kernel] FAILED: [code=1] example/ck_tile/38_block_scale_gemm/CMakeFiles/tile_example_gemm_quant.dir/gemm_bquant_quantgrouped_mx_bf16bf8.cpp.obj [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/example/ck_tile/38_block_scale_gemm/gemm_bquant_quantgrouped_mx_bf16bf8.cpp:4: [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/example/ck_tile/38_block_scale_gemm\run_gemm_quant_example.inc:17: [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/host.hpp:7: [composable_kernel] E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/host/concat.hpp:119:21: error: implicit conversion between pointer-to-function and pointer-to-object is a Microsoft extension [-Werror,-Wmicrosoft-cast] [composable_kernel]   119 |     ((oss << sep << rest), ...); [composable_kernel]       |                     ^~~~ [composable_kernel] E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp:248:16: note: in instantiation of function template specialization 'ck_tile::concat (), std::basic_string>' requested here [composable_kernel]   248 |         return concat('_', "gemm_quant", gemm_prec_str, GemmPipeline::GetName()); [composable_kernel]       |                ^ ``` There are plenty of other places where we use gemm_prec_str with the operator(), so I'm pretty sure these were just typos...but I'd like some eyes on it. 3. There are 2 tests that fail to build on Windows, which I've excluded from the build but will open bug tickets for: 1. gemm_weight_preshuffle 2. grouped_gemm_preshuffle Here's a sample of the compiler error for these tests: ``` [composable_kernel] [16/19] Building HIP object test/ck_tile/grouped_gemm_preshuffle/CMakeFiles/test_ck_tile_grouped_gemm_preshuffle.dir/test_grouped_gemm_preshuffle.cpp.obj [composable_kernel] FAILED: [code=1] test/ck_tile/grouped_gemm_preshuffle/CMakeFiles/test_ck_tile_grouped_gemm_preshuffle.dir/test_grouped_gemm_preshuffle.cpp.obj [composable_kernel] E:\TheRock\build\core\clr\dist\lib\llvm\bin\clang++.exe -DCK_ENABLE_BF16 -DCK_ENABLE_BF8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_FP8 -DCK_ENABLE_INT8 -DCK_TILE_USE_WMMA=1 -DCK_TIME_KERNEL=1 -DCK_USE_OCP_FP8 -DCK_USE_WMMA -DCK_USE_WMMA_FP8 -DCK_USE_XDL -DDPP_KERNELS -DUSE_PROF_API=1 -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1 -D__HIP_ROCclr__=1 -IE:/TheRock/rocm-libraries/projects/composablekernel/profiler/include -IE:/TheRock/rocm-libraries/projects/composablekernel -IE:/TheRock/rocm-libraries/projects/composablekernel/library/include -IE:/TheRock/rocm-libraries/projects/composablekernel/include -IE:/TheRock/build/ml-libs/composable_kernel/build/include -IE:/TheRock/build/base/half/stage/include -isystem E:/TheRock/build/core/clr/dist/include -isystem E:/TheRock/build/ml-libs/composable_kernel/build/_deps/gtest-src/googletest/include -isystem E:/TheRock/build/ml-libs/composable_kernel/build/_deps/gtest-src/googletest -isystem E:/TheRock/build/ml-libs/composable_kernel/build/_deps/getopt-src/src -O3 -DNDEBUG -std=gnu++20 --offload-arch=gfx1200 -D_DLL -D_MT -Xclang --dependent-lib=msvcrt -Wall -Wextra -Wcomment -Wendif-labels -Wformat -Winit-self -Wreturn-type -Wsequence-point -Wswitch -Wtrigraphs -Wundef -Wuninitialized -Wunreachable-code -Wunused -Wno-reserved-identifier -Wno-option-ignored -Wsign-compare -Wno-extra-semi-stmt -Wno-unused-template -Wno-missing-field-initializers -Wno-error=deprecated-declarations -Wall -Wextra -Wcomment -Wendif-labels -Wformat -Winit-self -Wreturn-type -Wsequence-point -Wswitch -Wtrigraphs -Wundef -Wuninitialized -Wunreachable-code -Wunused -Wno-reserved-identifier -Wno-option-ignored -Wsign-compare -Wno-extra-semi-stmt -Wno-unused-template -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-conversion -Wno-double-promotion -Wno-exit-time-destructors -Wno-extra-semi -Wno-float-conversion -Wno-gnu-anonymous-struct -Wno-gnu-zero-variadic-macro-arguments -Wno-missing-prototypes -Wno-nested-anon-types -Wno-padded -Wno-return-std-move-in-c++11 -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-unknown-warning-option -Wno-unused-command-line-argument -Wno-weak-vtables -Wno-covered-switch-default -Wno-unsafe-buffer-usage -Wno-unused-lambda-capture -Wno-nvcc-compat -Wno-c++20-compat -Wno-bit-int-extension -Wno-pass-failed -Wno-switch-default -Wno-unique-object-duplication -fbracket-depth=1024 -Wno-nrvo -Werror -Weverything -fcolor-diagnostics -Wno-c++20-extensions -Wno-global-constructors -Wno-undef -DCK_TILE_USE_OCP_FP8 -MD -MT test/ck_tile/grouped_gemm_preshuffle/CMakeFiles/test_ck_tile_grouped_gemm_preshuffle.dir/test_grouped_gemm_preshuffle.cpp.obj -MF test\ck_tile\grouped_gemm_preshuffle\CMakeFiles\test_ck_tile_grouped_gemm_preshuffle.dir\test_grouped_gemm_preshuffle.cpp.obj.d -o test/ck_tile/grouped_gemm_preshuffle/CMakeFiles/test_ck_tile_grouped_gemm_preshuffle.dir/test_grouped_gemm_preshuffle.cpp.obj -x hip -c E:/TheRock/rocm-libraries/projects/composablekernel/test/ck_tile/grouped_gemm_preshuffle/test_grouped_gemm_preshuffle.cpp [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/test/ck_tile/grouped_gemm_preshuffle/test_grouped_gemm_preshuffle.cpp:8: [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/host.hpp:6: [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/host/check_err.hpp:16: [composable_kernel] In file included from E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/core.hpp:89: [composable_kernel] E:/TheRock/rocm-libraries/projects/composablekernel/include\ck_tile/core/utility/env.hpp:110:31: warning: 'getenv' is deprecated: This function or variable may be unsafe. Consider using _dupenv_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. [-Wdeprecated-declarations] [composable_kernel] 110 | const char* vp = std::getenv(name); [composable_kernel] | ^ [composable_kernel] C:\Program Files (x86)\Windows Kits\10\include\10.0.22621.0\ucrt\stdlib.h:1183:20: note: 'getenv' has been explicitly marked deprecated here [composable_kernel] 1183 | _Check_return_ _CRT_INSECURE_DEPRECATE(_dupenv_s) [composable_kernel] | ^ [composable_kernel] C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Tools\MSVC\14.44.35207\include\vcruntime.h:368:55: note: expanded from macro '_CRT_INSECURE_DEPRECATE' [composable_kernel] 368 | #define _CRT_INSECURE_DEPRECATE(_Replacement) _CRT_DEPRECATE_TEXT( \ [composable_kernel] | ^ [composable_kernel] C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Tools\MSVC\14.44.35207\include\vcruntime.h:358:47: note: expanded from macro '_CRT_DEPRECATE_TEXT' [composable_kernel] 358 | #define _CRT_DEPRECATE_TEXT(_Text) __declspec(deprecated(_Text)) [composable_kernel] | ^ [composable_kernel] clang++: error: clang frontend command failed due to signal (use -v to see invocation) [composable_kernel] AMD clang version 22.0.0git (https://github.com/ROCm/llvm-project.git a2dc42b87c63e686377a69f09ea23aec7550babc+PATCHED:e4d5bf498b7b8626bb9716f1f5a5946d45025918) [composable_kernel] Target: x86_64-pc-windows-msvc [composable_kernel] Thread model: posix [composable_kernel] InstalledDir: E:\TheRock\build\core\clr\dist\lib\llvm\bin [composable_kernel] clang++: note: diagnostic msg: Error generating preprocessed source(s). [composable_kernel] ninja: build stopped: subcommand failed. [composable_kernel FAILED WITH CODE 1 in 238 seconds] ninja: build stopped: subcommand failed. ``` ## Test Plan Wait for internal CI and make sure build compiles locally. ## Test Result Waiting on CI ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --- example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py | 4 ++-- example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py | 8 ++++---- example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py | 4 ++-- example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py | 6 +++--- .../ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py | 4 ++-- example/ck_tile/02_layernorm2d/generate.py | 6 +++--- example/ck_tile/10_rmsnorm2d/generate.py | 6 +++--- .../38_block_scale_gemm/run_gemm_quant_example.inc | 1 + .../ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_jenga.py | 4 ++-- .../ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_vsa.py | 4 ++-- include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp | 2 +- .../ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp | 6 ++++-- .../ops/flatmm/kernel/mixed_prec_flatmm_kernel.hpp | 2 +- include/ck_tile/ops/flatmm/kernel/moe_flatmm_kernel.hpp | 2 +- include/ck_tile/ops/flatmm/kernel/mx_flatmm_kernel.hpp | 2 +- .../ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp | 2 +- test/ck_tile/gemm_weight_preshuffle/CMakeLists.txt | 7 ++++--- test/ck_tile/grouped_gemm_preshuffle/CMakeLists.txt | 6 ++++-- test/ck_tile/layernorm2d/generate.py | 6 +++--- test/ck_tile/rmsnorm2d/generate.py | 6 +++--- 20 files changed, 47 insertions(+), 41 deletions(-) diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py b/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py index 8459f20d35..f172bb6ab6 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py @@ -845,5 +845,5 @@ def list_blobs( with file_path.open("a") as f: _, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME).as_posix() + "\n") diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py b/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py index 39950d9a33..9be811cacd 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py @@ -1195,9 +1195,9 @@ def list_blobs( ) with file_path.open("a") as f: for k in kernels_dot_do_o: - f.write(str(file_path.parent / GEN_DIR / k.filename) + "\n") + f.write((file_path.parent / GEN_DIR / k.filename).as_posix() + "\n") for k in kernels_dq_dk_dv: - f.write(str(file_path.parent / GEN_DIR / k.filename) + "\n") + f.write((file_path.parent / GEN_DIR / k.filename).as_posix() + "\n") for k in kernels_convert_dq: - f.write(str(file_path.parent / GEN_DIR / k.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_BWD_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / k.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_BWD_API_FILENAME).as_posix() + "\n") diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py index c9a6013c40..1d0d04df77 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py @@ -1458,5 +1458,5 @@ def list_blobs( targets, kernel_filter, receipt, optdim_list, mask_impl ) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME).as_posix() + "\n") diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py index def90a5429..e0ccde8a6b 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py @@ -1136,10 +1136,10 @@ def list_blobs( targets, filter_list[0], receipt, optdim_list ) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") kernels = get_fwd_splitkv_blobs( targets, filter_list[1], receipt, mask_impl, optdim_list ) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_SPLITKV_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_FWD_SPLITKV_API_FILENAME).as_posix() + "\n") diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py b/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py index 45e5f9c705..1ac1f1c38a 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py @@ -795,5 +795,5 @@ def list_blobs( targets, kernel_filter, receipt, optdim_list, mask_impl ) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME).as_posix() + "\n") diff --git a/example/ck_tile/02_layernorm2d/generate.py b/example/ck_tile/02_layernorm2d/generate.py index 898e55f7cb..b166acda2a 100644 --- a/example/ck_tile/02_layernorm2d/generate.py +++ b/example/ck_tile/02_layernorm2d/generate.py @@ -1542,11 +1542,11 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t, blobs = self.get_blobs(args) with list_p.open("w") as list_f: # api related file - list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n") - list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n") + list_f.write((w_p / (self.name_api + ".cpp")).as_posix() + "\n") + list_f.write((w_p / (self.name_common_header + ".hpp")).as_posix() + "\n") # kernel instance file for b in blobs: - list_f.write(str(w_p / (b.name + ".cpp")) + "\n") + list_f.write((w_p / (b.name + ".cpp")).as_posix() + "\n") def gen_blobs(self, args) -> None: w_p = Path(self.working_path) diff --git a/example/ck_tile/10_rmsnorm2d/generate.py b/example/ck_tile/10_rmsnorm2d/generate.py index 7de1d4f54c..f440e7f371 100644 --- a/example/ck_tile/10_rmsnorm2d/generate.py +++ b/example/ck_tile/10_rmsnorm2d/generate.py @@ -2655,11 +2655,11 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t, blobs = self.get_blobs() with list_p.open("w") as list_f: # api related file - list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n") - list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n") + list_f.write((w_p / (self.name_api + ".cpp")).as_posix() + "\n") + list_f.write((w_p / (self.name_common_header + ".hpp")).as_posix() + "\n") # kernel instance file for b in blobs: - list_f.write(str(w_p / (b.name + ".cpp")) + "\n") + list_f.write((w_p / (b.name + ".cpp")).as_posix() + "\n") def gen_blobs(self) -> None: w_p = Path(self.working_path) diff --git a/example/ck_tile/38_block_scale_gemm/run_gemm_quant_example.inc b/example/ck_tile/38_block_scale_gemm/run_gemm_quant_example.inc index 85faf8b58b..614e195cbf 100644 --- a/example/ck_tile/38_block_scale_gemm/run_gemm_quant_example.inc +++ b/example/ck_tile/38_block_scale_gemm/run_gemm_quant_example.inc @@ -10,6 +10,7 @@ #include #include #include +#include #include "ck_tile/core/config.hpp" #include "ck_tile/ops/common/utils.hpp" diff --git a/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_jenga.py b/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_jenga.py index 7cf64849af..a3d32652a9 100644 --- a/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_jenga.py +++ b/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_jenga.py @@ -863,5 +863,5 @@ def list_blobs( with file_path.open("a") as f: _, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME).as_posix() + "\n") diff --git a/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_vsa.py b/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_vsa.py index 11b3fa743c..038738de24 100644 --- a/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_vsa.py +++ b/example/ck_tile/50_sparse_attn/codegen/ops/fmha_fwd_vsa.py @@ -863,5 +863,5 @@ def list_blobs( with file_path.open("a") as f: _, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl) for kernel in kernels: - f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n") - f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME) + "\n") + f.write((file_path.parent / GEN_DIR / kernel.filename).as_posix() + "\n") + f.write((file_path.parent / GEN_DIR / FMHA_FWD_API_FILENAME).as_posix() + "\n") diff --git a/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp index b2b36adb1e..bd98918b90 100644 --- a/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp @@ -282,7 +282,7 @@ struct FlatmmKernel [[nodiscard]] CK_TILE_HOST static const std::string GetName() { // clang-format off - return concat('_', "gemm", gemm_prec_str, FlatmmPipeline::GetName()); + return concat('_', "gemm", gemm_prec_str(), FlatmmPipeline::GetName()); // clang-format on } diff --git a/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp index 58d053d5ae..ae33137459 100644 --- a/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp @@ -227,8 +227,10 @@ struct GroupedFlatmmKernel : FlatmmKernel, FlatmmPipeline::GetName()); + return concat('_', + "grouped_flatmm", + gemm_prec_str(), + FlatmmPipeline::GetName()); } template , diff --git a/include/ck_tile/ops/flatmm/kernel/mixed_prec_flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/mixed_prec_flatmm_kernel.hpp index 61001522b0..d27f3c79a3 100644 --- a/include/ck_tile/ops/flatmm/kernel/mixed_prec_flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/mixed_prec_flatmm_kernel.hpp @@ -54,7 +54,7 @@ struct F16xMXF4FlatmmKernel : FlatmmKernel, FlatmmPipeline::GetName()); + return concat('_', "mixed_prec_gemm", gemm_prec_str(), FlatmmPipeline::GetName()); // clang-format on } diff --git a/include/ck_tile/ops/flatmm/kernel/moe_flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/moe_flatmm_kernel.hpp index a211d3b88e..13d5e65155 100644 --- a/include/ck_tile/ops/flatmm/kernel/moe_flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/moe_flatmm_kernel.hpp @@ -326,7 +326,7 @@ struct MoeFlatmmKernel [[nodiscard]] CK_TILE_HOST static const std::string GetName() { return concat( - '_', "moe_flatmm", gemm_prec_str, FlatmmPipeline::GetName()); + '_', "moe_flatmm", gemm_prec_str(), FlatmmPipeline::GetName()); } static constexpr auto BlockSize() -> dim3 { return dim3(kBlockSize); } diff --git a/include/ck_tile/ops/flatmm/kernel/mx_flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/mx_flatmm_kernel.hpp index a58d71c790..406e094b50 100644 --- a/include/ck_tile/ops/flatmm/kernel/mx_flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/mx_flatmm_kernel.hpp @@ -63,7 +63,7 @@ struct MXFlatmmKernel : FlatmmKernel, MXFlatmmPipeline::GetName()); + return concat('_', "mx_flatmm_gemm", gemm_prec_str(), MXFlatmmPipeline::GetName()); // clang-format on } diff --git a/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp b/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp index 7507ff58cc..486c1836ea 100644 --- a/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp +++ b/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp @@ -245,7 +245,7 @@ struct QuantGemmKernel [[nodiscard]] CK_TILE_HOST static const std::string GetName() { // clang-format off - return concat('_', "gemm_quant", gemm_prec_str, GemmPipeline::GetName()); + return concat('_', "gemm_quant", gemm_prec_str(), GemmPipeline::GetName()); // clang-format on } diff --git a/test/ck_tile/gemm_weight_preshuffle/CMakeLists.txt b/test/ck_tile/gemm_weight_preshuffle/CMakeLists.txt index 86db48335d..afed9d479b 100644 --- a/test/ck_tile/gemm_weight_preshuffle/CMakeLists.txt +++ b/test/ck_tile/gemm_weight_preshuffle/CMakeLists.txt @@ -16,9 +16,10 @@ list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS ) if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx11|gfx12") - add_gtest_executable(test_ck_tile_gemm_pipeline_wp test_gemm_pipeline_wp.cpp) - - target_compile_options(test_ck_tile_gemm_pipeline_wp PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) + if(NOT WIN32) #TODO: Figure out why this fails + add_gtest_executable(test_ck_tile_gemm_pipeline_wp test_gemm_pipeline_wp.cpp) + target_compile_options(test_ck_tile_gemm_pipeline_wp PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) + endif() else() message(DEBUG "Skipping ck_tile_gemm tests for current target") endif() diff --git a/test/ck_tile/grouped_gemm_preshuffle/CMakeLists.txt b/test/ck_tile/grouped_gemm_preshuffle/CMakeLists.txt index 3a230aed0c..0a7f6ac2a7 100644 --- a/test/ck_tile/grouped_gemm_preshuffle/CMakeLists.txt +++ b/test/ck_tile/grouped_gemm_preshuffle/CMakeLists.txt @@ -7,6 +7,8 @@ if(CK_USE_OCP_FP8) endif() if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12") - add_gtest_executable(test_ck_tile_grouped_gemm_preshuffle test_grouped_gemm_preshuffle.cpp) - target_compile_options(test_ck_tile_grouped_gemm_preshuffle PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) + if(NOT WIN32) #TODO: Figure out why this fails + add_gtest_executable(test_ck_tile_grouped_gemm_preshuffle test_grouped_gemm_preshuffle.cpp) + target_compile_options(test_ck_tile_grouped_gemm_preshuffle PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) + endif() endif() diff --git a/test/ck_tile/layernorm2d/generate.py b/test/ck_tile/layernorm2d/generate.py index f56cf8f4ab..0fb0c2cd4e 100644 --- a/test/ck_tile/layernorm2d/generate.py +++ b/test/ck_tile/layernorm2d/generate.py @@ -1507,11 +1507,11 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t, blobs = self.get_blobs(args) with list_p.open("w") as list_f: # api related file - list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n") - list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n") + list_f.write((w_p / (self.name_api + ".cpp")).as_posix() + "\n") + list_f.write((w_p / (self.name_common_header + ".hpp")).as_posix() + "\n") # kernel instance file for b in blobs: - list_f.write(str(w_p / (b.name + ".cpp")) + "\n") + list_f.write((w_p / (b.name + ".cpp")).as_posix() + "\n") def gen_blobs(self, args) -> None: w_p = Path(self.working_path) diff --git a/test/ck_tile/rmsnorm2d/generate.py b/test/ck_tile/rmsnorm2d/generate.py index 09c8edac70..37893dbcba 100644 --- a/test/ck_tile/rmsnorm2d/generate.py +++ b/test/ck_tile/rmsnorm2d/generate.py @@ -1509,11 +1509,11 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t, blobs = self.get_blobs() with list_p.open("w") as list_f: # api related file - list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n") - list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n") + list_f.write((w_p / (self.name_api + ".cpp")).as_posix() + "\n") + list_f.write((w_p / (self.name_common_header + ".hpp")).as_posix() + "\n") # kernel instance file for b in blobs: - list_f.write(str(w_p / (b.name + ".cpp")) + "\n") + list_f.write((w_p / (b.name + ".cpp")).as_posix() + "\n") def gen_blobs(self) -> None: w_p = Path(self.working_path)