mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-25 07:14:40 +00:00
test/ext/ep: make HT test Config env-driven
Allow tuning the internode HT test cfg from the environment without editing the source. Supported variables (all optional): MSCCLPP_EP_NSM (default 152) num channels / SMs MSCCLPP_EP_NVL_SEND (default 8) MSCCLPP_EP_NVL_RECV (default 256) MSCCLPP_EP_RDMA_SEND (default 16) MSCCLPP_EP_RDMA_RECV (default 128) The defaults match what we use for 16-node GB200 bench runs (e.g. NVL_RECV=512 to satisfy the HT combine assert at 16 nodes).
This commit is contained in:
@@ -121,7 +121,7 @@ def main():
|
||||
|
||||
# Buffer config for internode HT: needs num_rdma_bytes > 0. Size buffers
|
||||
# using max(hidden, bench_hidden) so the optional bench phase fits.
|
||||
cfg = ep.Config(20, 8, 256, 16, 128)
|
||||
cfg = ep.Config(int(os.environ.get("MSCCLPP_EP_NSM","152")), int(os.environ.get("MSCCLPP_EP_NVL_SEND","8")), int(os.environ.get("MSCCLPP_EP_NVL_RECV","256")), int(os.environ.get("MSCCLPP_EP_RDMA_SEND","16")), int(os.environ.get("MSCCLPP_EP_RDMA_RECV","128")))
|
||||
_bench_on = os.environ.get("MSCCLPP_EP_BENCH", "0") == "1"
|
||||
_buf_hidden = max(hidden, int(os.environ.get("MSCCLPP_EP_BENCH_HIDDEN", "0"))) if _bench_on else hidden
|
||||
num_nvl_bytes = cfg.get_nvl_buffer_size_hint(_buf_hidden * x.element_size(), num_ranks)
|
||||
|
||||
Reference in New Issue
Block a user