mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 15:14:10 +00:00
Another peer to peer copy check
This commit is contained in:
@@ -538,7 +538,15 @@ GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t
|
||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||
return false;
|
||||
#else
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
||||
int can_access_peer;
|
||||
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, src_ctx->device, dst_ctx->device));
|
||||
if (can_access_peer) {
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
||||
} else {
|
||||
GGML_CUDA_LOG_WARN("%s: attempt to copy from device %d to device %d without access enabled\n", __func__, src_ctx->device, dst_ctx->device);
|
||||
return false;
|
||||
}
|
||||
//CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
||||
#endif
|
||||
}
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
@@ -3089,6 +3097,10 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
CUDA_CHECK(cudaStreamWaitEvent(cuda_ctx_dst->stream(), cuda_ctx_src->copy_event, 0));
|
||||
} else {
|
||||
// src and dst are on the same backend
|
||||
if (cuda_ctx_src->device != cur_device) {
|
||||
GGML_CUDA_LOG_WARN("%s: attempt to copy on device %d while current device is %d\n", __func__, cuda_ctx_src->device, cur_device);
|
||||
CUDA_CHECK(cudaSetDevice(cuda_ctx_src->device));
|
||||
}
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, cuda_ctx_src->stream()));
|
||||
}
|
||||
return true;
|
||||
|
||||
Reference in New Issue
Block a user