From be6d9bafa816c9b61b4eea34c014c0c4560af591 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Tue, 31 Mar 2026 05:39:03 +0200 Subject: [PATCH] [CK_TILE] Changed cshuffle LDS descriptor to naive layout (#5729) ## Motivation This PR changes gemm/convolution cshuffle layout into plain one. to improve cshuffle operation performance. ## Technical Details The purpose is that before this change the cshuffle layout was having some descriptor transformations that were probably aimed at reducing LDS bank conflicts, but the transformations itself were terribly slow, which negatively impacted the performance. ## Test Plan There is no need for additional tests, since current tests cover this functionality. --- .../ck_tile/ops/epilogue/cshuffle_epilogue.hpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/include/ck_tile/ops/epilogue/cshuffle_epilogue.hpp b/include/ck_tile/ops/epilogue/cshuffle_epilogue.hpp index 3639c811fd..5d1ac2fd2f 100644 --- a/include/ck_tile/ops/epilogue/cshuffle_epilogue.hpp +++ b/include/ck_tile/ops/epilogue/cshuffle_epilogue.hpp @@ -336,9 +336,6 @@ struct CShuffleEpilogue constexpr index_t BaseWords = ToWords(BaseStrideElems); constexpr index_t PadWords = ((BaseWords % 2) == 0) ? 1 : 0; constexpr auto PaddingAmount = PadWords * ElemsPer4B; -#else - constexpr auto PaddingAmount = 0; -#endif constexpr auto lds_block_desc_0 = make_naive_tensor_descriptor( make_tuple(number{}, @@ -369,6 +366,18 @@ struct CShuffleEpilogue make_tuple(sequence<0>{}, sequence<1>{})); return lds_block_desc; + +#else + constexpr auto PaddingAmount = 0; + + constexpr auto lds_block_desc = make_naive_tensor_descriptor( + make_tuple(number{}, number{}), + make_tuple(number{}, number<1>{}), + number{}, + number<1>{}); + + return lds_block_desc; +#endif } // M is contiguous dimension else if constexpr(std::is_same_v)