[CK_TILE] Use read_tr in universal gemm (#2436)

* Use read_tr in universal gemm

* Enable all instances back

* Revert example37 changes

* Resolve comments

* resolve comments 2

* Fix assertion msg

* fix the gemm basic

* change index_t to bool for preshuffle variable

* Solve the comment

---------

Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
Co-authored-by: Max Podkorytov <4273004+tenpercent@users.noreply.github.com>
Co-authored-by: AviralGoelAMD <aviral.goel@amd.com>
This commit is contained in:
Yi DING
2025-07-17 14:56:22 +08:00
committed by GitHub
parent 579bd73435
commit f1d8ad2818
26 changed files with 1150 additions and 565 deletions

View File

@@ -48,8 +48,8 @@ struct TransposePolicy
constexpr auto input_dstr = MakeLdsLoadTileDistribution<Problem>();
using OutTileDstrEncode =
typename OutputTileDistributionTraits<remove_cvref_t<decltype(input_dstr)>,
typename Problem::DataType>::OutDstrEncode;
typename OutputTileDistributionTraits<typename decltype(input_dstr)::DstrEncode,
typename Problem::DataType>::TransposedDstrEncode;
constexpr auto block_dstr = make_static_tile_distribution(OutTileDstrEncode{});
return block_dstr;
@@ -131,7 +131,9 @@ struct TransposePolicy
constexpr index_t kSecondDimIterations = Problem::kIterationsInSecondDim;
constexpr index_t kSecondDimStrSub = kSecondRepetitions / kSecondDimIterations;
constexpr index_t kLaneGroupSize = 16;
constexpr auto xdllevel_dstr_encoding = make_transposed_distr_encode<DataType,
kLaneGroupSize,
kSecondDimStrSub,
kSecondDimIterations,
kLeadRepetitions,