From b30752a94fd70bfefbe1d3d834813b468390e8b6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 3 May 2026 22:51:44 +0000 Subject: [PATCH] review: fix docstring, trailing comma, import placement, and filename mismatch Agent-Logs-Url: https://github.com/microsoft/mscclpp/sessions/f587a2e1-568f-4596-bb02-342c101dd539 Co-authored-by: Binyang2014 <9415966+Binyang2014@users.noreply.github.com> --- python/mscclpp/__main__.py | 2 +- python/mscclpp/default_algos/allreduce_multi_nodes.py | 11 +++++++---- src/ext/collectives/algorithm_collection_builder.cc | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/python/mscclpp/__main__.py b/python/mscclpp/__main__.py index e70d4a8c..3a8f739e 100644 --- a/python/mscclpp/__main__.py +++ b/python/mscclpp/__main__.py @@ -73,7 +73,7 @@ default_algo_configs = [ tags={"default": 1}, ), "additional_kwargs": {"thread_block_group_size": 8}, - } + }, ] diff --git a/python/mscclpp/default_algos/allreduce_multi_nodes.py b/python/mscclpp/default_algos/allreduce_multi_nodes.py index 4de92ec7..20227c5c 100644 --- a/python/mscclpp/default_algos/allreduce_multi_nodes.py +++ b/python/mscclpp/default_algos/allreduce_multi_nodes.py @@ -2,12 +2,13 @@ # Licensed under the MIT License. """ -Multi-node AllReduce implementation using packet-based communication. -This implements a hierarchical AllReduce: intra-node allreduce followed by -inter-node exchange and final intra-node allreduce. +Generalized multi-node AllReduce implementation using packet-based communication. +This implements a hierarchical AllReduce for N nodes: +1. Intra-node reduce-scatter (each GPU reduces its assigned chunk across the node) +2. Inter-node allreduce (exchange fully intra-reduced chunks across all nodes) +3. Intra-node broadcast (distribute the fully reduced chunks back to all GPUs in the node) """ -import argparse from mscclpp.language.utils import AlgoSpec from mscclpp.language.channel import * from mscclpp.language.rank import * @@ -208,6 +209,8 @@ def allreduce_multi_nodes(spec: AlgoSpec, thread_block_group_size: int) -> Colle if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() parser.add_argument("--name", type=str, help="name of the program") parser.add_argument("--num_gpus", type=int, help="total number of gpus") diff --git a/src/ext/collectives/algorithm_collection_builder.cc b/src/ext/collectives/algorithm_collection_builder.cc index 1e6fc0ee..701e5be2 100644 --- a/src/ext/collectives/algorithm_collection_builder.cc +++ b/src/ext/collectives/algorithm_collection_builder.cc @@ -113,7 +113,7 @@ AlgorithmCollection AlgorithmCollectionBuilder::buildDefaultDslAlgorithms(int ra }; static const std::vector defaultAlgoConfigs = { {"allreduce_2nodes_1K_64K.json", "allreduce", 8, 16, {{"default", 1}}}, - {"allreduce_2nodes_64K_2M.json", "allreduce", 8, 16, {{"default", 1}}}, + {"allreduce_2nodes_128K_2M.json", "allreduce", 8, 16, {{"default", 1}}}, {"allreduce_4nodes_1K_8M.json", "allreduce", 8, 32, {{"default", 1}}}}; AlgorithmCollection collection;