[CK_TILE] FMHA Support hdim_v to as a Multiple of 32 (#2114)

* 160+192 * Add splitkv d160 * cleanup * fix * Add change log * Fix CHANGELOG * Use static_cast * Update ignored instance --------- Co-authored-by: asleepzzz <hanwen.chang@amd.com> [ROCm/composable_kernel commit: b8212864cf]
2026-05-20 12:59:49 +00:00 · 2025-06-24 01:33:31 +08:00
parent 470608bdcb
commit cba904aeff
7 changed files with 89 additions and 68 deletions
--- a/include/ck_tile/core/tensor/shuffle_tile.hpp
+++ b/include/ck_tile/core/tensor/shuffle_tile.hpp
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once

@@ -129,7 +129,10 @@ CK_TILE_DEVICE void shuffle_tile_impl_in_thread(OutTensor& out_tensor, const InT
        // set output vectors
        static_for<0, num_vec_out, 1>{}([&](auto i) {
            constexpr auto idx_y_out_tmp = generate_array(
-                [&](auto ii) { return ii == y_dim_vec_in ? idx_y_start[ii] + i : idx_y_start[ii]; },
+                [&](auto ii) {
+                    return ii == y_dim_vec_in ? static_cast<index_t>(idx_y_start[ii]) + i
+                                              : static_cast<index_t>(idx_y_start[ii]);
+                },
                number<NDimY>{});

            constexpr auto idx_y_out =