mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
cuda: set device to src device before p2p copy (#1073)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -3451,6 +3451,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
needs_f16_f32_copy = true;
|
||||
|
||||
} else {
|
||||
ggml_cuda_set_device(cuda_ctx_src->device);
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), cuda_ctx_src->stream()));
|
||||
}
|
||||
#endif
|
||||
@@ -3458,10 +3459,8 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
|
||||
// record event on src stream after the copy
|
||||
if (!cuda_ctx_src->copy_event) {
|
||||
ggml_cuda_set_device(cuda_ctx_src->device);
|
||||
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
|
||||
}
|
||||
|
||||
CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, cuda_ctx_src->stream()));
|
||||
|
||||
// wait on dst stream for the copy to complete
|
||||
|
||||
Reference in New Issue
Block a user