fix a16w4 moe bugs (#3373)

* fix valid mask bug

* update format

[ROCm/composable_kernel commit: 6f0966e1e9]
This commit is contained in:
lalala-sh
2025-12-09 17:54:55 +08:00
committed by GitHub
parent b726f9606c
commit 77f9a0a615

View File

@@ -1259,12 +1259,12 @@ struct MoeFlatmmKernel
auto fused_token =
kargs.p_sorted_token_ids[row_idx]; // topk-idx[31:24] + token_idx[23:0]
index_t scatter_token_id = fused_token & token_id_mask;
index_t scatter_token_id = fused_token & token_id_mask;
c_scatter_valids[mIter][m0] = (scatter_token_id < kargs.NumTokens);
if constexpr(IsInputGemm)
scatter_token_id =
scatter_token_id * kargs.TopK + (fused_token >> token_id_offset);
c_scatter_offsets[mIter][m0] = scatter_token_id * kargs.stride_C;
c_scatter_valids[mIter][m0] = (scatter_token_id < kargs.NumTokens);
});
});