[CK_TILE] Use read_tr in universal gemm (#2436)

* Use read_tr in universal gemm

* Enable all instances back

* Revert example37 changes

* Resolve comments

* resolve comments 2

* Fix assertion msg

* fix the gemm basic

* change index_t to bool for preshuffle variable

* Solve the comment

---------

Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
Co-authored-by: Max Podkorytov <4273004+tenpercent@users.noreply.github.com>
Co-authored-by: AviralGoelAMD <aviral.goel@amd.com>

[ROCm/composable_kernel commit: f1d8ad2818]
This commit is contained in:
Yi DING
2025-07-17 14:56:22 +08:00
committed by GitHub
parent ac0a7f44e1
commit 3bb687d502
26 changed files with 1150 additions and 565 deletions

View File

@@ -23,7 +23,7 @@ args:
-n n dimension (default:2048)
-k k dimension (default:64)
-a_layout Tensor A data layout (default: R)
-b_layout Tensor B data layout (default: R)
-b_layout Tensor B data layout (default: C)
-c_layout Tensor C data layout (default: R)
-stride_a Tensor A stride (default:0)
-stride_b Tensor B stride (default:0)

View File

@@ -48,8 +48,8 @@ struct TransposePolicy
constexpr auto input_dstr = MakeLdsLoadTileDistribution<Problem>();
using OutTileDstrEncode =
typename OutputTileDistributionTraits<remove_cvref_t<decltype(input_dstr)>,
typename Problem::DataType>::OutDstrEncode;
typename OutputTileDistributionTraits<typename decltype(input_dstr)::DstrEncode,
typename Problem::DataType>::TransposedDstrEncode;
constexpr auto block_dstr = make_static_tile_distribution(OutTileDstrEncode{});
return block_dstr;
@@ -131,7 +131,9 @@ struct TransposePolicy
constexpr index_t kSecondDimIterations = Problem::kIterationsInSecondDim;
constexpr index_t kSecondDimStrSub = kSecondRepetitions / kSecondDimIterations;
constexpr index_t kLaneGroupSize = 16;
constexpr auto xdllevel_dstr_encoding = make_transposed_distr_encode<DataType,
kLaneGroupSize,
kSecondDimStrSub,
kSecondDimIterations,
kLeadRepetitions,