From 563f970e102cdcef50241197fa3474c8e85f0d74 Mon Sep 17 00:00:00 2001 From: lirui927 Date: Wed, 24 Jun 2026 17:34:07 +0800 Subject: [PATCH] Support OAI SwiGLU in MoE epilogue --- .../ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp index c9a4c8bc5a..672be4e9b8 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp @@ -278,17 +278,19 @@ struct GridwiseMoeGemm : public GridwiseGemm_xdl_cshuffle_base< using Base::NumDTensor; static constexpr auto BlockSizeNumber = Number{}; - // Clamp limit for swiglustep_and_mul: silu(g).clamp(max=L) * u.clamp(+-L), L hardcoded to 7.0 + // MiniMax-M3 / GPT-OSS OAI SwiGLU: clamp first, then + // gate * sigmoid(alpha * gate) * (up + beta). Values match model config. static constexpr float kSwiGluClamp = 7.0f; + static constexpr float kSwiGluAlpha = 1.702f; + static constexpr float kSwiGluBeta = 1.0f; - // Helper: apply SwiGLU-step activation (silu + symmetric clamp) and return gate*up. // Used by all four swiglustep_and_mul epilogue paths (quant/non-quant x pipeline-A/B). __host__ __device__ static constexpr float apply_swiglustep_activation(float gate, float up) { - tensor_operation::element_wise::Silu{}(gate, gate); gate = math::min(gate, kSwiGluClamp); up = math::min(math::max(up, -kSwiGluClamp), kSwiGluClamp); - return gate * up; + const float sigmoid = 1.0f / (1.0f + math::exp(-kSwiGluAlpha * gate)); + return gate * sigmoid * (up + kSwiGluBeta); } using mfma_selector = MfmaSelector;