diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
index 828ea4c2..ad641c85 100644
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -1479,9 +1479,10 @@ static void ggml_cuda_op_mul_mat_cublas(
     GGML_UNUSED(src1_padded_row_size);
 }
 
-static void ggml_cuda_set_peer_access(int main_device) {
+static bool ggml_cuda_set_peer_access(int main_device) {
     ggml_cuda_set_device(main_device);
 
+    bool all_enabled = true;
     for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) {
         if (main_device == id_other) {
             continue;
@@ -1500,8 +1501,11 @@ static void ggml_cuda_set_peer_access(int main_device) {
                 // reset the error
                 (void)cudaGetLastError();
             }
+        } else {
+            all_enabled = false;
         }
     }
+    return all_enabled;
 }
 
 static cudaError_t ggml_cuda_Memcpy2DPeerAsync(
@@ -4453,7 +4457,7 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device, [[maybe_unused]] con
 
 #if !defined(GGML_CUDA_NO_PEER_COPY)
     if (enable_p2p) {
-        ggml_cuda_set_peer_access(device);
+        ctx->p2p_enabled = ggml_cuda_set_peer_access(device);
     }
 #endif
 
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index 27909321..c1034d8d 100644
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -843,6 +843,7 @@ struct ggml_backend_cuda_context {
     int device;
     std::string name;
     cudaEvent_t copy_event = nullptr;
+    bool p2p_enabled = false;
 
     cudaStream_t streams[GGML_CUDA_MAX_DEVICES][GGML_CUDA_MAX_STREAMS] = { { nullptr } };
     cublasHandle_t cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr};
diff --git a/ggml/src/ggml-cuda/reduce.cu b/ggml/src/ggml-cuda/reduce.cu
index cf8d59e3..7b63bd47 100644
--- a/ggml/src/ggml-cuda/reduce.cu
+++ b/ggml/src/ggml-cuda/reduce.cu
@@ -176,7 +176,7 @@ void ggml_cuda_op_reduce([[maybe_unused]] ggml_backend_cuda_context & ctx, ggml_
         GGML_ASSERT(ii == nhave);
         GGML_ASSERT(have_this_device);
     }
-    if (nhave == 4 && dst->ne[1] <= 8) {
+    if (nhave == 4 && dst->ne[1] <= 8 && ctx.p2p_enabled) {
         for (int ii = 0; ii < nhave; ++ii) {
             int i = idx[ii];
             GGML_ASSERT(dst->src[i]->type == dst->type);
@@ -241,7 +241,7 @@ void ggml_cuda_op_reduce([[maybe_unused]] ggml_backend_cuda_context & ctx, ggml_
         ggml_cuda_set_device(ctx.device);
         return;
     }
-    if (dst->ne[1] <= 8) {
+    if (dst->ne[1] <= 8 && ctx.p2p_enabled) {
         for (int ii = 0; ii < nhave; ++ii) {
             int i = idx[ii];
             GGML_ASSERT(dst->src[i]->type == dst->type);