From cfead25bbf695b577fcb8ef6d5cb4ab63e2901f6 Mon Sep 17 00:00:00 2001 From: YAMY <74099316+YAMY1234@users.noreply.github.com> Date: Tue, 17 Mar 2026 04:30:58 -0700 Subject: [PATCH] [Qwen3.5] mamba slice fix (Prefill TP != Decode TP & decode TP size>1) (#20655) Co-authored-by: Shangming Cai --- python/sglang/srt/disaggregation/mooncake/conn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py index 4f89f7097..15e815e69 100644 --- a/python/sglang/srt/disaggregation/mooncake/conn.py +++ b/python/sglang/srt/disaggregation/mooncake/conn.py @@ -726,7 +726,9 @@ class MooncakeKVManager(CommonKVManager): # Each prefill sends all its dims to the appropriate offset in decode src_dim_start = 0 num_dims_to_send = src_dim - dst_dim_start = local_tp_rank_in_group * src_dim + writers_per_decode = self.attn_tp_size // dst_attn_tp_size + local_writer_idx = local_tp_rank_in_group % writers_per_decode + dst_dim_start = local_writer_idx * src_dim else: # 1 prefill rank sends to multiple decode ranks # Prefill sends a slice of its dims to each decode rank