mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 09:09:50 +00:00
Be more careful with having set the device before using a stream (#1093)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -3429,6 +3429,7 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend,
|
||||
|
||||
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
||||
|
||||
ggml_cuda_set_device(cuda_ctx->device);
|
||||
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cuda_ctx->stream()));
|
||||
}
|
||||
|
||||
@@ -3530,6 +3531,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
}
|
||||
|
||||
// record event on src stream after the copy
|
||||
ggml_cuda_set_device(cuda_ctx_src->device);
|
||||
if (!cuda_ctx_src->copy_event) {
|
||||
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
|
||||
}
|
||||
@@ -3547,6 +3549,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
}
|
||||
} else {
|
||||
// src and dst are on the same backend
|
||||
printf("Why is this being invoked?\n");
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, cuda_ctx_src->stream()));
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -343,6 +343,7 @@ void ggml_cuda_op_reduce([[maybe_unused]] ggml_backend_cuda_context & ctx, ggml_
|
||||
auto elem_size = ggml_element_size(dst);
|
||||
for (int ii = 0; ii < nhave; ++ii) {
|
||||
int i = idx[ii];
|
||||
ggml_cuda_set_device(i);
|
||||
int this_nelem = std::min(nelem_per_device, nelem - ii*nelem_per_device);
|
||||
copy_task task;
|
||||
task.nptr = nhave;
|
||||
@@ -389,18 +390,20 @@ void ggml_cuda_op_reduce([[maybe_unused]] ggml_backend_cuda_context & ctx, ggml_
|
||||
//printf("Submitted kernels\n");
|
||||
for (int ii = 0; ii < nhave; ++ii) {
|
||||
int i = idx[ii];
|
||||
ggml_cuda_set_device(i);
|
||||
CUDA_CHECK(cudaEventRecord(info.all_ctx[i]->copy_event, info.all_ctx[i]->stream()));
|
||||
}
|
||||
//printf("Recorded events again\n");
|
||||
for (int ii = 0; ii < nhave; ++ii) {
|
||||
int i = idx[ii];
|
||||
ggml_cuda_set_device(i);
|
||||
for (int jj = 0; jj < nhave; ++jj) {
|
||||
if (jj == ii) continue;
|
||||
int j = idx[jj];
|
||||
CUDA_CHECK(cudaStreamWaitEvent(info.all_ctx[i]->stream(), info.all_ctx[j]->copy_event));
|
||||
}
|
||||
}
|
||||
//printf("All good so far\n");
|
||||
ggml_cuda_set_device(ctx.device);
|
||||
return;
|
||||
}
|
||||
auto required_size = nbytes*(nhave-1);
|
||||
|
||||
Reference in New Issue
Block a user