From 3800080d25cc64f46169ce4f14bfdfdce3f5146b Mon Sep 17 00:00:00 2001 From: "BingYuan.Zhou" Date: Fri, 14 Nov 2025 09:46:13 +0800 Subject: [PATCH] fix build error (#3195) Co-authored-by: root [ROCm/composable_kernel commit: 4d629cd2b0bb0b4b210881be0db398bcd382f444] --- example/ck_tile/18_flatmm/flatmm_basic.cpp | 2 +- example/ck_tile/18_flatmm/run_flatmm_example.inc | 2 +- .../ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp | 16 ++++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/example/ck_tile/18_flatmm/flatmm_basic.cpp b/example/ck_tile/18_flatmm/flatmm_basic.cpp index 9155b27dba..cf05abd51c 100644 --- a/example/ck_tile/18_flatmm/flatmm_basic.cpp +++ b/example/ck_tile/18_flatmm/flatmm_basic.cpp @@ -47,7 +47,7 @@ static constexpr inline auto is_row_major(Layout layout_) // mfma_type, 0:32x32, 1:16x16 template -auto shuffle_b(const ck_tile::HostTensor& t) +auto shuffle_b_v0(const ck_tile::HostTensor& t) { assert(t.get_lengths().size() == 2); int n_ = t.get_lengths()[1]; diff --git a/example/ck_tile/18_flatmm/run_flatmm_example.inc b/example/ck_tile/18_flatmm/run_flatmm_example.inc index 69bf39f670..4063fe284e 100644 --- a/example/ck_tile/18_flatmm/run_flatmm_example.inc +++ b/example/ck_tile/18_flatmm/run_flatmm_example.inc @@ -103,7 +103,7 @@ int run_flatmm_example_with_layouts(int argc, } else { - return shuffle_b(b_origin_host); + return shuffle_b_v0(b_origin_host); } }(); ck_tile::DeviceMem b_shuffle_dev_buf(b_shuffle_host.get_element_space_size_in_bytes()); diff --git a/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp index a53a4a499e..7523acc080 100644 --- a/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp @@ -662,17 +662,21 @@ struct FlatmmKernel const auto scale_m_view = make_naive_tensor_view( kargs.scale_m_ptr.ptr, - make_tuple( - kargs.M / ScaleGranularityM, - ScaleGranularityKA == 0 ? 1 : splitk_batch_offset.splitted_k / ScaleGranularityKA), + make_tuple(kargs.M / ScaleGranularityM, + ScaleGranularityKA == 0 + ? 1 + : splitk_batch_offset.splitted_k / + (ScaleGranularityKA != 0 ? ScaleGranularityKA : 1)), make_tuple(scale_stride_m, 0), number < ScaleGranularityM == 1 ? FlatmmPipeline::GetVectorSizeA() : 1 > {}, number<1>{}); const auto scale_n_view = make_naive_tensor_view( kargs.scale_n_ptr.ptr, - make_tuple( - ScaleGranularityKB == 0 ? 1 : (splitk_batch_offset.splitted_k / ScaleGranularityKB), - kargs.N / ScaleGranularityN), + make_tuple(ScaleGranularityKB == 0 + ? 1 + : (splitk_batch_offset.splitted_k / + (ScaleGranularityKB != 0 ? ScaleGranularityKB : 1)), + kargs.N / ScaleGranularityN), make_tuple(0, scale_stride_n), number < ScaleGranularityN == 1 ? FlatmmPipeline::GetVectorSizeB() : 1 > {}, number<1>{});