[rocm-libraries] ROCm/rocm-libraries#5729 (commit 516c974)

[CK_TILE] Changed cshuffle LDS descriptor to naive layout
 (#5729)

## Motivation
This PR changes gemm/convolution cshuffle layout into plain one. to
improve cshuffle operation performance.

## Technical Details
The purpose is that before this change the cshuffle layout was having
some descriptor transformations that were probably aimed at reducing LDS
bank conflicts, but the transformations itself were terribly slow, which
negatively impacted the performance.

## Test Plan
There is no need for additional tests, since current tests cover this
functionality.
This commit is contained in:
jakpiase
2026-03-31 03:40:25 +00:00
committed by assistant-librarian[bot]
parent e6b8094f94
commit 66dc81d530

View File

@@ -336,9 +336,6 @@ struct CShuffleEpilogue
constexpr index_t BaseWords = ToWords(BaseStrideElems);
constexpr index_t PadWords = ((BaseWords % 2) == 0) ? 1 : 0;
constexpr auto PaddingAmount = PadWords * ElemsPer4B;
#else
constexpr auto PaddingAmount = 0;
#endif
constexpr auto lds_block_desc_0 = make_naive_tensor_descriptor(
make_tuple(number<MPerIterationShuffle / MLdsLayer>{},
@@ -369,6 +366,18 @@ struct CShuffleEpilogue
make_tuple(sequence<0>{}, sequence<1>{}));
return lds_block_desc;
#else
constexpr auto PaddingAmount = 0;
constexpr auto lds_block_desc = make_naive_tensor_descriptor(
make_tuple(number<MPerIterationShuffle>{}, number<NPerIterationShuffle>{}),
make_tuple(number<NPerIterationShuffle + PaddingAmount>{}, number<1>{}),
number<VectorLen>{},
number<1>{});
return lds_block_desc;
#endif
}
// M is contiguous dimension
else if constexpr(std::is_same_v<ELayout, tensor_layout::gemm::ColumnMajor>)