diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp index 5337fd5e2c..81aedf8e80 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp @@ -1576,7 +1576,7 @@ struct GridwiseMoeGemm static_for<0, EMRepeats, 1>{}([&](auto m0) { const index_t fused_token = p_sorted_token_ids[c_token_pos + m0]; index_t token_offset = fused_token & 0xffffff; - float weight = p_sorted_weights_0[token_offset * problem.StrideDs[0]]; + float weight = token_offset < problem.NumTokens? p_sorted_weights_0[token_offset * problem.StrideDs[0]] : 1.0; if constexpr(IsInputGemm) { token_offset = token_offset * problem.TopK + (fused_token >> 24);