diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index e2019b5f..b6dc3502 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -3451,6 +3451,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_ needs_f16_f32_copy = true; } else { + ggml_cuda_set_device(cuda_ctx_src->device); CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), cuda_ctx_src->stream())); } #endif @@ -3458,10 +3459,8 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_ // record event on src stream after the copy if (!cuda_ctx_src->copy_event) { - ggml_cuda_set_device(cuda_ctx_src->device); CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming)); } - CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, cuda_ctx_src->stream())); // wait on dst stream for the copy to complete