enable peer access (NVlink) (#1050)

* enable peer access for cuda * Remove redundant loop
2026-04-30 11:21:56 +00:00 · 2025-12-11 07:31:56 +00:00
parent 279b4c44fc
commit 37e41d22dc
1 changed files with 22 additions and 47 deletions
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -1434,59 +1434,30 @@ static void ggml_cuda_op_mul_mat_cublas(
    GGML_UNUSED(src1_padded_row_size);
 }
-#if 0
+static void ggml_cuda_set_peer_access(int main_device) {
-static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
+    ggml_cuda_set_device(main_device);
    static bool peer_access_enabled = false;
-    const bool enable_peer_access = n_tokens <= GGML_CUDA_PEER_MAX_BATCH_SIZE;
+    for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) {
        if (main_device == id_other) {
            continue;
        }
-    if (peer_access_enabled == enable_peer_access) {
+        int can_access_peer;
-        return;
+        CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, main_device, id_other));
-    }
+        if (can_access_peer) {
-
+//~ #ifdef NDEBUG
-#ifdef NDEBUG
+            GGML_CUDA_LOG_INFO(" =========================== %s: Enabling Peer Access between Devices %d->%d\n", __func__, main_device, id_other);
-    for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) {
+//~ #endif //NDEBUG
-        ggml_cuda_set_device(id);
+            cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0);
-        CUDA_CHECK(cudaDeviceSynchronize());
+            if (err != cudaErrorPeerAccessAlreadyEnabled) {
-    }
+                CUDA_CHECK(err);
-
+            } else {
-    for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) {
+                // reset the error
-        ggml_cuda_set_device(id);
+                (void)cudaGetLastError();
        for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) {
            if (id == id_other) {
                continue;
            }
            if (id != main_device && id_other != main_device) {
                continue;
            }
            int can_access_peer;
            CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
            if (can_access_peer) {
                if (enable_peer_access) {
                    cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0);
                    if (err != cudaErrorPeerAccessAlreadyEnabled) {
                        CUDA_CHECK(err);
                    }
                } else {
                    cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
                    if (err != cudaErrorPeerAccessNotEnabled) {
                        CUDA_CHECK(err);
                    }
                }
            }
        }
    }
    ggml_cuda_set_device(main_device);
 #endif // NDEBUG
    peer_access_enabled = enable_peer_access;
    GGML_UNUSED(main_device);
 }
 #endif
 static cudaError_t ggml_cuda_Memcpy2DPeerAsync(
    void * dst, int dstDevice, size_t dpitch, void * src, int srcDevice, size_t spitch, size_t width, size_t height, cudaStream_t stream) {
@@ -4390,6 +4361,10 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device, [[maybe_unused]] con
 #endif
    }
 #if !defined(GGML_CUDA_NO_PEER_COPY)
    ggml_cuda_set_peer_access(device);
 #endif
    return cuda_backend;
 }