From ffc41f64fd74f1a5fc13be59a513b004055b0d92 Mon Sep 17 00:00:00 2001 From: lalala-sh Date: Tue, 29 Apr 2025 00:49:31 +0800 Subject: [PATCH] fix moe i4 example bug (#2139) [ROCm/composable_kernel commit: 83394e40d2452d32701bed4ed85bea1bfa50cfc2] --- example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp index 3c3ef16198..9e80a2ca35 100644 --- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp +++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp @@ -233,7 +233,7 @@ int main(int argc, char* argv[]) ck::index_t StrideB = K; ck::index_t StrideE = N; constexpr ck::index_t NumDTensor = DsDataType::Size(); - constexpr auto StrideDs = std::array{0, 0, 0}; + constexpr auto StrideDs = std::array{1, 1, 1}; ck::index_t KBatch = 1; @@ -266,7 +266,8 @@ int main(int argc, char* argv[]) Tensor b0_e_n_k(HostTensorDescriptor({experts, K, N * 2}, {N * 2 * K, 1, K})); Tensor b0_preshuffled(HostTensorDescriptor({experts, K, N * 2}, {N * 2 * K, 1, K})); Tensor d0_t_n(HostTensorDescriptor({tokens, N}, {StrideDs[0], 0})); - Tensor d1_e_n(HostTensorDescriptor({experts, N * 2}, {1, StrideDs[1]})); + Tensor d1_e_n( + HostTensorDescriptor({experts, N * 2}, {StrideDs[1] * N * 2, StrideDs[1]})); Tensor d2_e_n(HostTensorDescriptor({sorted_size, N}, {1, 0})); Tensor e_t_n_host_result(HostTensorDescriptor({tokens, topk, N}, {topk * N, N, 1})); Tensor e_t_n_device_result(