diff --git a/python/mscclpp/__main__.py b/python/mscclpp/__main__.py index 3a8f739e..70e2fad8 100644 --- a/python/mscclpp/__main__.py +++ b/python/mscclpp/__main__.py @@ -74,6 +74,27 @@ default_algo_configs = [ ), "additional_kwargs": {"thread_block_group_size": 8}, }, + { + "filename": "allreduce_8nodes_1K_8M.json", + "function": def_algo.allreduce_multi_nodes, + "spec": AlgoSpec( + name="allreduce_8nodes_1K_8M", + collective=AllReduce(64, 1, True), + nranks_per_node=8, + world_size=64, + in_place=True, + instances=1, + protocol="LL", + auto_sync=False, + num_threads_per_block=1024, + reuse_resources=True, + use_double_scratch_buffer=True, + min_message_size=1 << 10, + max_message_size=8 << 20, + tags={"default": 1}, + ), + "additional_kwargs": {"thread_block_group_size": 1}, + }, ] diff --git a/src/ext/collectives/algorithm_collection_builder.cc b/src/ext/collectives/algorithm_collection_builder.cc index 701e5be2..b7729a79 100644 --- a/src/ext/collectives/algorithm_collection_builder.cc +++ b/src/ext/collectives/algorithm_collection_builder.cc @@ -114,7 +114,8 @@ AlgorithmCollection AlgorithmCollectionBuilder::buildDefaultDslAlgorithms(int ra static const std::vector defaultAlgoConfigs = { {"allreduce_2nodes_1K_64K.json", "allreduce", 8, 16, {{"default", 1}}}, {"allreduce_2nodes_128K_2M.json", "allreduce", 8, 16, {{"default", 1}}}, - {"allreduce_4nodes_1K_8M.json", "allreduce", 8, 32, {{"default", 1}}}}; + {"allreduce_4nodes_1K_8M.json", "allreduce", 8, 32, {{"default", 1}}}, + {"allreduce_8nodes_1K_8M.json", "allreduce", 8, 64, {{"default", 1}}}}; AlgorithmCollection collection; static auto generateFileId = [](const std::string& input) {