From 63afb25ab369f87a07782e84e532bf823f32f910 Mon Sep 17 00:00:00 2001 From: Qinghua Zhou Date: Thu, 23 Apr 2026 21:53:34 +0000 Subject: [PATCH] =?UTF-8?q?tests/ep:=20LL=20bench=20combine=20uses=20recv?= =?UTF-8?q?=5Ftokens=C3=97hidden=20for=20payload=20bytes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each local expert sends one copy per dispatched token back to its owner, so the bytes actually on the wire during combine match dispatch. The previous num_tokens×hidden under-counted by ~num_topk×, making combine BW look artificially low next to dispatch. --- test/python/ext/ep/test_low_latency_multirank.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/python/ext/ep/test_low_latency_multirank.py b/test/python/ext/ep/test_low_latency_multirank.py index 1e9f6ba5..d82ea309 100644 --- a/test/python/ext/ep/test_low_latency_multirank.py +++ b/test/python/ext/ep/test_low_latency_multirank.py @@ -256,10 +256,12 @@ def main(): comb_us = start_ev.elapsed_time(end_ev) * 1e3 / iters # Dispatch payload: recv_tokens × hidden × bf16 (received on this rank). - # Combine payload: num_tokens × hidden × bf16 (sent from each local expert - # back to the owning rank; one token's worth of bytes per reduction). + # Combine payload: recv_tokens × hidden × bf16 as well -- each local expert + # sends one copy per dispatched token back to its owner, so the bytes on + # the wire match dispatch. Using num_tokens × hidden here would under-count + # the actual send payload by ~num_topk×. disp_bytes = recv_tokens * hidden * 2 - comb_bytes = num_tokens * hidden * 2 + comb_bytes = recv_tokens * hidden * 2 disp_bw = disp_bytes / (disp_us * 1e-6) / 1e9 comb_bw = comb_bytes / (comb_us * 1e-6) / 1e9