mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-03-24 09:07:46 +00:00
Introduce handle cache for AMD platform. Avoid reaching handle limitation if we open too much IPC handles For nvidia, we don't need this feature since nvidia will count the handle reference internally and reuse the same handle if already be opened --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Binyang2014 <9415966+Binyang2014@users.noreply.github.com> Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
28 lines
752 B
Python
28 lines
752 B
Python
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT License.
|
|
|
|
import mpi4py
|
|
import os
|
|
import sys
|
|
|
|
mpi4py.rc.initialize = False
|
|
mpi4py.rc.finalize = True
|
|
|
|
import cupy as cp
|
|
from mpi4py import MPI
|
|
|
|
|
|
def pytest_configure(config):
|
|
"""Initialize MPI before test collection."""
|
|
if not MPI.Is_initialized():
|
|
MPI.Init()
|
|
shm_comm = MPI.COMM_WORLD.Split_type(MPI.COMM_TYPE_SHARED, 0, MPI.INFO_NULL)
|
|
N_GPUS_PER_NODE = shm_comm.size
|
|
shm_comm.Free()
|
|
cp.cuda.Device(MPI.COMM_WORLD.rank % N_GPUS_PER_NODE).use()
|
|
|
|
# only print process with rank 0 to avoid bad fd issue
|
|
if MPI.COMM_WORLD.rank != 0:
|
|
sys.stdout = open(os.devnull, "w")
|
|
sys.stderr = open(os.devnull, "w")
|