This commit is contained in:
Kawrakow
2025-12-20 06:50:58 +00:00
parent 5604ec9eae
commit e28148d401
2 changed files with 6 additions and 3 deletions

View File

@@ -3471,6 +3471,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
needs_f16_f32_copy = true;
} else {
printf("%s on device %d -> %s on device %d\n", src->name, cuda_ctx_src->device, dst->name, cuda_ctx_dst->device);
ggml_cuda_set_device(cuda_ctx_src->device);
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), cuda_ctx_src->stream()));
}

View File

@@ -663,7 +663,8 @@ ggml_tensor * llm_build_context::llm_build_ffn(
GGML_ASSERT((!split_u && !iextra->splits[id]) || (split_u && iextra->splits[id]));
}
if (!split_u) continue;
auto cur = iextra ? iextra->splits[id] : input;
//auto cur = iextra ? iextra->splits[id] : input;
auto cur = input;
if (ffn_norm && ffn_norm->extra) {
auto norm = (ggml_split_tensor_t *)ffn_norm->extra;
GGML_ASSERT(norm->splits[id]);
@@ -671,7 +672,7 @@ ggml_tensor * llm_build_context::llm_build_ffn(
cb(cur, "ffn_inp_normed", il_cb);
}
if (cur->type != GGML_TYPE_F32) {
cur = ggml_cast(ctx, input, GGML_TYPE_F32);
cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
}
cur = ggml_fused_up_gate(ctx, split_u, split_g, cur, unary_op);
cb(cur, "ffn_up_gate", il_cb);
@@ -9398,7 +9399,8 @@ ggml_tensor * llm_build_context::build_std_attention(ggml_cgraph * gf, ggml_tens
GGML_ASSERT((!split_wq && !iextra->splits[id]) || (split_wq && iextra->splits[id]));
}
if (!split_wq) continue;
auto cur = iextra ? iextra->splits[id] : input;
//auto cur = iextra ? iextra->splits[id] : input;
auto cur = input;
if (attn_norm) {
auto split_norm = attn_norm->splits[id];
cur = llm_build_norm(ctx0, cur, hparams, split_norm, NULL, LLM_NORM_RMS, cb, il);