diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 990afa70..05ddef80 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -538,7 +538,15 @@ GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t #ifdef GGML_CUDA_NO_PEER_COPY return false; #else - CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread)); + int can_access_peer; + CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, src_ctx->device, dst_ctx->device)); + if (can_access_peer) { + CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread)); + } else { + GGML_CUDA_LOG_WARN("%s: attempt to copy from device %d to device %d without access enabled\n", __func__, src_ctx->device, dst_ctx->device); + return false; + } + //CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread)); #endif } CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread)); @@ -3089,6 +3097,10 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_ CUDA_CHECK(cudaStreamWaitEvent(cuda_ctx_dst->stream(), cuda_ctx_src->copy_event, 0)); } else { // src and dst are on the same backend + if (cuda_ctx_src->device != cur_device) { + GGML_CUDA_LOG_WARN("%s: attempt to copy on device %d while current device is %d\n", __func__, cuda_ctx_src->device, cur_device); + CUDA_CHECK(cudaSetDevice(cuda_ctx_src->device)); + } CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, cuda_ctx_src->stream())); } return true;