mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
[rocm-libraries] ROCm/rocm-libraries#5729 (commit 516c974)
[CK_TILE] Changed cshuffle LDS descriptor to naive layout (#5729) ## Motivation This PR changes gemm/convolution cshuffle layout into plain one. to improve cshuffle operation performance. ## Technical Details The purpose is that before this change the cshuffle layout was having some descriptor transformations that were probably aimed at reducing LDS bank conflicts, but the transformations itself were terribly slow, which negatively impacted the performance. ## Test Plan There is no need for additional tests, since current tests cover this functionality.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
e6b8094f94
commit
66dc81d530
@@ -336,9 +336,6 @@ struct CShuffleEpilogue
|
||||
constexpr index_t BaseWords = ToWords(BaseStrideElems);
|
||||
constexpr index_t PadWords = ((BaseWords % 2) == 0) ? 1 : 0;
|
||||
constexpr auto PaddingAmount = PadWords * ElemsPer4B;
|
||||
#else
|
||||
constexpr auto PaddingAmount = 0;
|
||||
#endif
|
||||
|
||||
constexpr auto lds_block_desc_0 = make_naive_tensor_descriptor(
|
||||
make_tuple(number<MPerIterationShuffle / MLdsLayer>{},
|
||||
@@ -369,6 +366,18 @@ struct CShuffleEpilogue
|
||||
make_tuple(sequence<0>{}, sequence<1>{}));
|
||||
|
||||
return lds_block_desc;
|
||||
|
||||
#else
|
||||
constexpr auto PaddingAmount = 0;
|
||||
|
||||
constexpr auto lds_block_desc = make_naive_tensor_descriptor(
|
||||
make_tuple(number<MPerIterationShuffle>{}, number<NPerIterationShuffle>{}),
|
||||
make_tuple(number<NPerIterationShuffle + PaddingAmount>{}, number<1>{}),
|
||||
number<VectorLen>{},
|
||||
number<1>{});
|
||||
|
||||
return lds_block_desc;
|
||||
#endif
|
||||
}
|
||||
// M is contiguous dimension
|
||||
else if constexpr(std::is_same_v<ELayout, tensor_layout::gemm::ColumnMajor>)
|
||||
|
||||
Reference in New Issue
Block a user