From 8108567dc1d2597477fc263fdf415d3c0199e441 Mon Sep 17 00:00:00 2001 From: Khushbu Agarwal Date: Sat, 10 May 2025 22:40:05 -0700 Subject: [PATCH] Support for swizzle and transpose for MFMA_16x16x32_F16/BF16 (#2172) * Changes for updating tile distribution for shuffle and transpose * Fixed swizzle and transpose, removed comments * clang formatted * Adding support for bf16 type * Addressing review comments [ROCm/composable_kernel commit: d8faf1c6a161ddcee98e9dfca3cc00941eec9f61] --- include/ck_tile/ops/gemm/warp/warp_gemm.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/ck_tile/ops/gemm/warp/warp_gemm.hpp b/include/ck_tile/ops/gemm/warp/warp_gemm.hpp index 5cc5ddc70e..5ed97dc05c 100644 --- a/include/ck_tile/ops/gemm/warp/warp_gemm.hpp +++ b/include/ck_tile/ops/gemm/warp/warp_gemm.hpp @@ -77,6 +77,18 @@ using WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution = 2>>; #endif +#if defined(__gfx950__) +using WarpGemmMfmaF16F16F32M16N16K32SwizzleBTransposedCDistribution = + WarpGemmImpl, + 1>>; + +using WarpGemmMfmaBf16Bf16F32M16N16K32SwizzleBTransposedCDistribution = + WarpGemmImpl, + 1>>; +#endif + #if defined(__gfx950__) using WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution = WarpGemmImpl