diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py index 4f89f7097..15e815e69 100644 --- a/python/sglang/srt/disaggregation/mooncake/conn.py +++ b/python/sglang/srt/disaggregation/mooncake/conn.py @@ -726,7 +726,9 @@ class MooncakeKVManager(CommonKVManager): # Each prefill sends all its dims to the appropriate offset in decode src_dim_start = 0 num_dims_to_send = src_dim - dst_dim_start = local_tp_rank_in_group * src_dim + writers_per_decode = self.attn_tp_size // dst_attn_tp_size + local_writer_idx = local_tp_rank_in_group % writers_per_decode + dst_dim_start = local_writer_idx * src_dim else: # 1 prefill rank sends to multiple decode ranks # Prefill sends a slice of its dims to each decode rank