[Qwen3.5] mamba slice fix (Prefill TP != Decode TP & decode TP size>1) (#20655)

Co-authored-by: Shangming Cai <csmthu@gmail.com>
2026-07-01 20:27:57 +00:00 · 2026-03-17 04:30:58 -07:00
parent 966ae87d02
commit cfead25bbf
1 changed files with 3 additions and 1 deletions
--- a/python/sglang/srt/disaggregation/mooncake/conn.py
+++ b/python/sglang/srt/disaggregation/mooncake/conn.py
@@ -726,7 +726,9 @@ class MooncakeKVManager(CommonKVManager):
                # Each prefill sends all its dims to the appropriate offset in decode
                src_dim_start = 0
                num_dims_to_send = src_dim
-                dst_dim_start = local_tp_rank_in_group * src_dim
+                writers_per_decode = self.attn_tp_size // dst_attn_tp_size
+                local_writer_idx = local_tp_rank_in_group % writers_per_decode
+                dst_dim_start = local_writer_idx * src_dim
            else:
                # 1 prefill rank sends to multiple decode ranks
                # Prefill sends a slice of its dims to each decode rank