Support for swizzle and transpose for MFMA_16x16x32_F16/BF16 (#2172)

* Changes for updating tile distribution for shuffle and transpose

* Fixed swizzle and transpose, removed comments

* clang formatted

* Adding support for bf16 type

* Addressing review comments
This commit is contained in:
Khushbu Agarwal
2025-05-10 22:40:05 -07:00
committed by GitHub
parent 6fddb5708c
commit d8faf1c6a1

View File

@@ -77,6 +77,18 @@ using WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution =
2>>;
#endif
#if defined(__gfx950__)
using WarpGemmMfmaF16F16F32M16N16K32SwizzleBTransposedCDistribution =
WarpGemmImpl<WarpGemmAtrributeMfmaTransposedCDistribution_SwizzleB<
WarpGemmAttributeMfmaImplF16F16F32M16N16K32<WGAttrCtlEnum::Default_>,
1>>;
using WarpGemmMfmaBf16Bf16F32M16N16K32SwizzleBTransposedCDistribution =
WarpGemmImpl<WarpGemmAtrributeMfmaTransposedCDistribution_SwizzleB<
WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K32<WGAttrCtlEnum::Default_>,
1>>;
#endif
#if defined(__gfx950__)
using WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution =
WarpGemmImpl<WarpGemmAtrributeMfmaTransposedCDistribution_SwizzleB<