From 37e41d22dcf44aebb50be0505c3ec54b4ad58b44 Mon Sep 17 00:00:00 2001 From: abc-nix <135605456+abc-nix@users.noreply.github.com> Date: Thu, 11 Dec 2025 07:31:56 +0000 Subject: [PATCH] enable peer access (NVlink) (#1050) * enable peer access for cuda * Remove redundant loop --- ggml/src/ggml-cuda.cu | 69 ++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 47 deletions(-) diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index cd0bf889..c0a59768 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -1434,59 +1434,30 @@ static void ggml_cuda_op_mul_mat_cublas( GGML_UNUSED(src1_padded_row_size); } -#if 0 -static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) { - static bool peer_access_enabled = false; +static void ggml_cuda_set_peer_access(int main_device) { + ggml_cuda_set_device(main_device); - const bool enable_peer_access = n_tokens <= GGML_CUDA_PEER_MAX_BATCH_SIZE; + for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) { + if (main_device == id_other) { + continue; + } - if (peer_access_enabled == enable_peer_access) { - return; - } - -#ifdef NDEBUG - for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) { - ggml_cuda_set_device(id); - CUDA_CHECK(cudaDeviceSynchronize()); - } - - for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) { - ggml_cuda_set_device(id); - - for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) { - if (id == id_other) { - continue; - } - if (id != main_device && id_other != main_device) { - continue; - } - - int can_access_peer; - CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other)); - if (can_access_peer) { - if (enable_peer_access) { - cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0); - if (err != cudaErrorPeerAccessAlreadyEnabled) { - CUDA_CHECK(err); - } - } else { - cudaError_t err = cudaDeviceDisablePeerAccess(id_other); - if (err != cudaErrorPeerAccessNotEnabled) { - CUDA_CHECK(err); - } - } + int can_access_peer; + CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, main_device, id_other)); + if (can_access_peer) { +//~ #ifdef NDEBUG + GGML_CUDA_LOG_INFO(" =========================== %s: Enabling Peer Access between Devices %d->%d\n", __func__, main_device, id_other); +//~ #endif //NDEBUG + cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0); + if (err != cudaErrorPeerAccessAlreadyEnabled) { + CUDA_CHECK(err); + } else { + // reset the error + (void)cudaGetLastError(); } } } - - ggml_cuda_set_device(main_device); -#endif // NDEBUG - - peer_access_enabled = enable_peer_access; - - GGML_UNUSED(main_device); } -#endif static cudaError_t ggml_cuda_Memcpy2DPeerAsync( void * dst, int dstDevice, size_t dpitch, void * src, int srcDevice, size_t spitch, size_t width, size_t height, cudaStream_t stream) { @@ -4390,6 +4361,10 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device, [[maybe_unused]] con #endif } +#if !defined(GGML_CUDA_NO_PEER_COPY) + ggml_cuda_set_peer_access(device); +#endif + return cuda_backend; }