mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-12 01:10:22 +00:00
review: fix docstring, trailing comma, import placement, and filename mismatch
Agent-Logs-Url: https://github.com/microsoft/mscclpp/sessions/f587a2e1-568f-4596-bb02-342c101dd539 Co-authored-by: Binyang2014 <9415966+Binyang2014@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
2a34a7ed11
commit
b30752a94f
@@ -73,7 +73,7 @@ default_algo_configs = [
|
||||
tags={"default": 1},
|
||||
),
|
||||
"additional_kwargs": {"thread_block_group_size": 8},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
Multi-node AllReduce implementation using packet-based communication.
|
||||
This implements a hierarchical AllReduce: intra-node allreduce followed by
|
||||
inter-node exchange and final intra-node allreduce.
|
||||
Generalized multi-node AllReduce implementation using packet-based communication.
|
||||
This implements a hierarchical AllReduce for N nodes:
|
||||
1. Intra-node reduce-scatter (each GPU reduces its assigned chunk across the node)
|
||||
2. Inter-node allreduce (exchange fully intra-reduced chunks across all nodes)
|
||||
3. Intra-node broadcast (distribute the fully reduced chunks back to all GPUs in the node)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from mscclpp.language.utils import AlgoSpec
|
||||
from mscclpp.language.channel import *
|
||||
from mscclpp.language.rank import *
|
||||
@@ -208,6 +209,8 @@ def allreduce_multi_nodes(spec: AlgoSpec, thread_block_group_size: int) -> Colle
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--name", type=str, help="name of the program")
|
||||
parser.add_argument("--num_gpus", type=int, help="total number of gpus")
|
||||
|
||||
Reference in New Issue
Block a user