mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-07-01 20:27:57 +00:00
[Qwen3.5] mamba slice fix (Prefill TP != Decode TP & decode TP size>1) (#20655)
Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
@@ -726,7 +726,9 @@ class MooncakeKVManager(CommonKVManager):
|
||||
# Each prefill sends all its dims to the appropriate offset in decode
|
||||
src_dim_start = 0
|
||||
num_dims_to_send = src_dim
|
||||
dst_dim_start = local_tp_rank_in_group * src_dim
|
||||
writers_per_decode = self.attn_tp_size // dst_attn_tp_size
|
||||
local_writer_idx = local_tp_rank_in_group % writers_per_decode
|
||||
dst_dim_start = local_writer_idx * src_dim
|
||||
else:
|
||||
# 1 prefill rank sends to multiple decode ranks
|
||||
# Prefill sends a slice of its dims to each decode rank
|
||||
|
||||
Reference in New Issue
Block a user