tests/ep: HT benches also print per_rank_bw

Same alignment with NCCL-EP ep_bench as the LL test: report both
per-rank (agg/num_ranks) and aggregate throughput.
This commit is contained in:
Qinghua Zhou
2026-04-23 22:58:23 +00:00
parent 10cd0012f1
commit 9840853c69
2 changed files with 12 additions and 4 deletions

View File

@@ -338,11 +338,15 @@ def main():
flush=True,
)
print(
f" dispatch: {disp_us_t.item():.1f}us (max) agg_bw={disp_bw_t.item():.2f} GB/s",
f" dispatch: {disp_us_t.item():.1f}us (max) "
f"per_rank_bw={disp_bw_t.item() / num_ranks:.2f} GB/s "
f"agg_bw={disp_bw_t.item():.2f} GB/s",
flush=True,
)
print(
f" combine : {comb_us_t.item():.1f}us (max) agg_bw={comb_bw_t.item():.2f} GB/s",
f" combine : {comb_us_t.item():.1f}us (max) "
f"per_rank_bw={comb_bw_t.item() / num_ranks:.2f} GB/s "
f"agg_bw={comb_bw_t.item():.2f} GB/s",
flush=True,
)

View File

@@ -285,11 +285,15 @@ def main():
flush=True,
)
print(
f" dispatch: {disp_us_t.item():.1f}us (max) agg_bw={disp_bw_t.item():.2f} GB/s",
f" dispatch: {disp_us_t.item():.1f}us (max) "
f"per_rank_bw={disp_bw_t.item() / num_ranks:.2f} GB/s "
f"agg_bw={disp_bw_t.item():.2f} GB/s",
flush=True,
)
print(
f" combine : {comb_us_t.item():.1f}us (max) agg_bw={comb_bw_t.item():.2f} GB/s",
f" combine : {comb_us_t.item():.1f}us (max) "
f"per_rank_bw={comb_bw_t.item() / num_ranks:.2f} GB/s "
f"agg_bw={comb_bw_t.item():.2f} GB/s",
flush=True,
)