mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-23 06:34:13 +00:00
Fix split mode graph when p2p is not enabled
This commit is contained in:
@@ -1479,9 +1479,10 @@ static void ggml_cuda_op_mul_mat_cublas(
|
||||
GGML_UNUSED(src1_padded_row_size);
|
||||
}
|
||||
|
||||
static void ggml_cuda_set_peer_access(int main_device) {
|
||||
static bool ggml_cuda_set_peer_access(int main_device) {
|
||||
ggml_cuda_set_device(main_device);
|
||||
|
||||
bool all_enabled = true;
|
||||
for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) {
|
||||
if (main_device == id_other) {
|
||||
continue;
|
||||
@@ -1500,8 +1501,11 @@ static void ggml_cuda_set_peer_access(int main_device) {
|
||||
// reset the error
|
||||
(void)cudaGetLastError();
|
||||
}
|
||||
} else {
|
||||
all_enabled = false;
|
||||
}
|
||||
}
|
||||
return all_enabled;
|
||||
}
|
||||
|
||||
static cudaError_t ggml_cuda_Memcpy2DPeerAsync(
|
||||
@@ -4453,7 +4457,7 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device, [[maybe_unused]] con
|
||||
|
||||
#if !defined(GGML_CUDA_NO_PEER_COPY)
|
||||
if (enable_p2p) {
|
||||
ggml_cuda_set_peer_access(device);
|
||||
ctx->p2p_enabled = ggml_cuda_set_peer_access(device);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -843,6 +843,7 @@ struct ggml_backend_cuda_context {
|
||||
int device;
|
||||
std::string name;
|
||||
cudaEvent_t copy_event = nullptr;
|
||||
bool p2p_enabled = false;
|
||||
|
||||
cudaStream_t streams[GGML_CUDA_MAX_DEVICES][GGML_CUDA_MAX_STREAMS] = { { nullptr } };
|
||||
cublasHandle_t cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr};
|
||||
|
||||
@@ -176,7 +176,7 @@ void ggml_cuda_op_reduce([[maybe_unused]] ggml_backend_cuda_context & ctx, ggml_
|
||||
GGML_ASSERT(ii == nhave);
|
||||
GGML_ASSERT(have_this_device);
|
||||
}
|
||||
if (nhave == 4 && dst->ne[1] <= 8) {
|
||||
if (nhave == 4 && dst->ne[1] <= 8 && ctx.p2p_enabled) {
|
||||
for (int ii = 0; ii < nhave; ++ii) {
|
||||
int i = idx[ii];
|
||||
GGML_ASSERT(dst->src[i]->type == dst->type);
|
||||
@@ -241,7 +241,7 @@ void ggml_cuda_op_reduce([[maybe_unused]] ggml_backend_cuda_context & ctx, ggml_
|
||||
ggml_cuda_set_device(ctx.device);
|
||||
return;
|
||||
}
|
||||
if (dst->ne[1] <= 8) {
|
||||
if (dst->ne[1] <= 8 && ctx.p2p_enabled) {
|
||||
for (int ii = 0; ii < nhave; ++ii) {
|
||||
int i = idx[ii];
|
||||
GGML_ASSERT(dst->src[i]->type == dst->type);
|
||||
|
||||
Reference in New Issue
Block a user