From e2acf7f1c8a274a8ba71ae4b182bfb47010119ea Mon Sep 17 00:00:00 2001 From: Caio Rocha <164253795+caiomcbr@users.noreply.github.com> Date: Fri, 20 Feb 2026 16:04:12 -0800 Subject: [PATCH] Removing MPI Dependency (#743) --- .azure-pipelines/multi-nodes-test.yml | 2 +- .../templates/integration-test.yaml | 2 +- .azure-pipelines/templates/nccl-test.yaml | 2 +- .azure-pipelines/templates/ut-no-ib-env.yaml | 2 +- .azure-pipelines/templates/ut-npkit.yaml | 2 +- .azure-pipelines/templates/ut.yaml | 4 +- CMakeLists.txt | 2 +- python/mscclpp/_core/comm.py | 37 +++++++++++++------ 8 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.azure-pipelines/multi-nodes-test.yml b/.azure-pipelines/multi-nodes-test.yml index 97a95c94..914c2317 100644 --- a/.azure-pipelines/multi-nodes-test.yml +++ b/.azure-pipelines/multi-nodes-test.yml @@ -44,7 +44,7 @@ jobs: targetType: 'inline' script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/templates/integration-test.yaml b/.azure-pipelines/templates/integration-test.yaml index b9dac24b..99ed6d04 100644 --- a/.azure-pipelines/templates/integration-test.yaml +++ b/.azure-pipelines/templates/integration-test.yaml @@ -19,7 +19,7 @@ steps: targetType: inline script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/templates/nccl-test.yaml b/.azure-pipelines/templates/nccl-test.yaml index 1ce37d20..56b75d3f 100644 --- a/.azure-pipelines/templates/nccl-test.yaml +++ b/.azure-pipelines/templates/nccl-test.yaml @@ -27,7 +27,7 @@ steps: targetType: 'inline' script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)/mscclpp' diff --git a/.azure-pipelines/templates/ut-no-ib-env.yaml b/.azure-pipelines/templates/ut-no-ib-env.yaml index aa21c407..e6576f6d 100644 --- a/.azure-pipelines/templates/ut-no-ib-env.yaml +++ b/.azure-pipelines/templates/ut-no-ib-env.yaml @@ -16,7 +16,7 @@ steps: targetType: 'inline' script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/templates/ut-npkit.yaml b/.azure-pipelines/templates/ut-npkit.yaml index 0ab733c9..5c35317e 100644 --- a/.azure-pipelines/templates/ut-npkit.yaml +++ b/.azure-pipelines/templates/ut-npkit.yaml @@ -63,7 +63,7 @@ steps: set -e; \ cd /root/mscclpp; \ mkdir -p build && cd build; \ - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} -DMSCCLPP_NPKIT_FLAGS=\"-DENABLE_NPKIT -DENABLE_NPKIT_EVENT_TIME_SYNC_CPU -DENABLE_NPKIT_EVENT_TIME_SYNC_GPU -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_EXIT -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT\" ..; \ + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} -DMSCCLPP_NPKIT_FLAGS=\"-DENABLE_NPKIT -DENABLE_NPKIT_EVENT_TIME_SYNC_CPU -DENABLE_NPKIT_EVENT_TIME_SYNC_GPU -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_EXIT -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT\" ..; \ make -j"' kill $CHILD_PID workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/templates/ut.yaml b/.azure-pipelines/templates/ut.yaml index 82ff4aac..2086fd0a 100644 --- a/.azure-pipelines/templates/ut.yaml +++ b/.azure-pipelines/templates/ut.yaml @@ -20,9 +20,9 @@ steps: script: | mkdir build && cd build if [ "${{ parameters.platform }}" == "rocm" ]; then - CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. + CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. else - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. fi make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/CMakeLists.txt b/CMakeLists.txt index 6288dbb0..e524b9ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,7 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) # Options option(MSCCLPP_ENABLE_TRACE "Enable tracing" OFF) -option(MSCCLPP_BUILD_TESTS "Build tests" ON) +option(MSCCLPP_BUILD_TESTS "Build tests" OFF) option(MSCCLPP_BUILD_PYTHON_BINDINGS "Build Python bindings" ON) option(MSCCLPP_BUILD_EXT_NCCL "Build NCCL interfaces" ON) option(MSCCLPP_BUILD_EXT_COLLECTIVES "Build collective algorithms" ON) diff --git a/python/mscclpp/_core/comm.py b/python/mscclpp/_core/comm.py index f0c5c219..e74a0e6f 100644 --- a/python/mscclpp/_core/comm.py +++ b/python/mscclpp/_core/comm.py @@ -19,8 +19,8 @@ from mscclpp._mscclpp import ( CppTransport, CppTransportFlags, ) -import mpi4py import numpy as np +import pickle from mscclpp.utils import is_torch_tensor @@ -29,20 +29,35 @@ __all__ = ["CommGroup"] class CommGroup: def __init__( - self, mpi_comm: mpi4py.MPI.Comm = None, interfaceIpPortTrio: str = "", rank: int = None, size: int = None + self, + mpi_comm: "mpi4py.MPI.Comm" = None, + torch_group: "dist.ProcessGroup" = None, + interfaceIpPortTrio: str = "", + rank: int = None, + size: int = None, ): - if interfaceIpPortTrio == "": - self.bootstrap = CppTcpBootstrap.create(mpi_comm.rank, mpi_comm.size) + if interfaceIpPortTrio == "" and (mpi_comm is not None or torch_group is not None): uniq_id = None - if mpi_comm.rank == 0: - # similar to NCCL's unique id + self.bootstrap = CppTcpBootstrap.create(rank, size) + if rank == 0: uniq_id = self.bootstrap.create_unique_id() - uniq_id_global = mpi_comm.bcast(uniq_id, 0) + if mpi_comm is not None: + import mpi4py + + uniq_id_global = mpi_comm.bcast(uniq_id, 0) + else: + import torch + import torch.distributed as dist + + if rank == 0: + uniq_id_global = uniq_id + pickled_data = pickle.dumps(uniq_id) + data_tensor = torch.frombuffer(bytearray(pickled_data), dtype=torch.uint8).clone() + else: + data_tensor = torch.zeros(256, dtype=torch.uint8) + dist.broadcast(data_tensor, src=0, group=torch_group) + uniq_id_global = pickle.loads(data_tensor.numpy().tobytes()) self.bootstrap.initialize(uniq_id_global) - elif mpi_comm: - # use this instead - self.bootstrap = CppTcpBootstrap.create(mpi_comm.rank, mpi_comm.size) - self.bootstrap.initialize(interfaceIpPortTrio) elif not interfaceIpPortTrio == "": assert rank >= 0 and size >= 1 self.bootstrap = CppTcpBootstrap.create(rank, size)