[Qwen3.5] mamba slice fix (Prefill TP != Decode TP & decode TP size>1) (#20655)

Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
YAMY
2026-03-17 04:30:58 -07:00
committed by GitHub
parent 966ae87d02
commit cfead25bbf

View File

@@ -726,7 +726,9 @@ class MooncakeKVManager(CommonKVManager):
# Each prefill sends all its dims to the appropriate offset in decode
src_dim_start = 0
num_dims_to_send = src_dim
dst_dim_start = local_tp_rank_in_group * src_dim
writers_per_decode = self.attn_tp_size // dst_attn_tp_size
local_writer_idx = local_tp_rank_in_group % writers_per_decode
dst_dim_start = local_writer_idx * src_dim
else:
# 1 prefill rank sends to multiple decode ranks
# Prefill sends a slice of its dims to each decode rank