Add handle cache for AMD platform (#698)

Introduce handle cache for AMD platform.
Avoid reaching handle limitation if we open too much IPC handles

For nvidia, we don't need this feature since nvidia will count the
handle reference internally and reuse the same handle if already be
opened

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Binyang2014 <9415966+Binyang2014@users.noreply.github.com>
Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
This commit is contained in:
Binyang Li
2025-12-22 10:39:12 +08:00
committed by GitHub
parent 8d998820a3
commit eda74a7f29
7 changed files with 133 additions and 69 deletions

27
python/test/conftest.py Normal file
View File

@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import mpi4py
import os
import sys
mpi4py.rc.initialize = False
mpi4py.rc.finalize = True
import cupy as cp
from mpi4py import MPI
def pytest_configure(config):
"""Initialize MPI before test collection."""
if not MPI.Is_initialized():
MPI.Init()
shm_comm = MPI.COMM_WORLD.Split_type(MPI.COMM_TYPE_SHARED, 0, MPI.INFO_NULL)
N_GPUS_PER_NODE = shm_comm.size
shm_comm.Free()
cp.cuda.Device(MPI.COMM_WORLD.rank % N_GPUS_PER_NODE).use()
# only print process with rank 0 to avoid bad fd issue
if MPI.COMM_WORLD.rank != 0:
sys.stdout = open(os.devnull, "w")
sys.stderr = open(os.devnull, "w")