Add handle cache for AMD platform (#698)

Introduce handle cache for AMD platform. Avoid reaching handle limitation if we open too much IPC handles For nvidia, we don't need this feature since nvidia will count the handle reference internally and reuse the same handle if already be opened --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Binyang2014 <9415966+Binyang2014@users.noreply.github.com> Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
2026-05-05 06:01:26 +00:00 · 2025-12-22 10:39:12 +08:00
parent 8d998820a3
commit eda74a7f29
7 changed files with 133 additions and 69 deletions
--- a/python/test/conftest.py
+++ b/python/test/conftest.py
@@ -0,0 +1,27 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import mpi4py
+import os
+import sys
+
+mpi4py.rc.initialize = False
+mpi4py.rc.finalize = True
+
+import cupy as cp
+from mpi4py import MPI
+
+
+def pytest_configure(config):
+    """Initialize MPI before test collection."""
+    if not MPI.Is_initialized():
+        MPI.Init()
+        shm_comm = MPI.COMM_WORLD.Split_type(MPI.COMM_TYPE_SHARED, 0, MPI.INFO_NULL)
+        N_GPUS_PER_NODE = shm_comm.size
+        shm_comm.Free()
+        cp.cuda.Device(MPI.COMM_WORLD.rank % N_GPUS_PER_NODE).use()
+
+        # only print process with rank 0 to avoid bad fd issue
+        if MPI.COMM_WORLD.rank != 0:
+            sys.stdout = open(os.devnull, "w")
+            sys.stderr = open(os.devnull, "w")