From 2f0ee8ccb1e2236ab50e9568340fa8d70c1a87d6 Mon Sep 17 00:00:00 2001 From: mtgu0705 Date: Tue, 27 May 2025 10:46:43 -0500 Subject: [PATCH] change the gemm1 tile from 64x128x128 to 128x64x128 --- example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp index f531daa24f..0881f74cc2 100644 --- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp +++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp @@ -147,8 +147,8 @@ constexpr ck::index_t ScaleBlockSize = 32; // scaling block constexpr ck::index_t KPerBlock = 256 / DataPackedSize; // 256 f4 = 128 fp4x2 static constexpr ck::index_t Nswizzle = false; static constexpr ck::index_t ActOP = 0; // 0: gelu_and_mul, 1: silu_and_mul -static constexpr ck::index_t MPerBlock = 64; -static constexpr ck::index_t NPerBlock = 128; +static constexpr ck::index_t MPerBlock = 128; +static constexpr ck::index_t NPerBlock = 64; static constexpr ck::index_t BlockSize = 256; static constexpr bool MulRoutedWeight = true; @@ -161,7 +161,7 @@ using DeviceOpInstance = ck::tensor_operation::device::Devic MPerBlock, NPerBlock, KPerBlock, 16, 16, 16, 16, - 2, 4, + 4, 2, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, 2, 2, S<1, 32, 1, 8>, S<8, 1, 1, 1>,