Fix graph parallel when ngl < n_layers (#1241)

* Fix graph parallel when ngl < n_layers

* Fix using ffn_norm

When using graph parallel with ngl < n_layers, the ffn_norm tensor
may have ended up being split, while the ffn tensors are on the CPU.
In that case we will get a crash because we attempt to use the not-split
buffer of ffn_norm, which is invalid. Thi commit fixes that.

* Cleanup
This commit is contained in:
Kawrakow
2026-02-06 11:48:24 +02:00
committed by GitHub
parent 4d86907b18
commit c5d74f66e2
2 changed files with 68 additions and 62 deletions

View File

@@ -744,8 +744,12 @@ ggml_tensor * llm_build_context::llm_build_ffn(
}
auto cur = input;
//if (input->op == GGML_OP_REDUCE) {
// if (input->src[lctx.model.main_gpu]) cur = input->src[lctx.model.main_gpu];
//}
if (ffn_norm) {
cur = llm_build_norm(ctx, cur, lctx.model.hparams, ffn_norm, NULL, is_norm ? LLM_NORM : LLM_NORM_RMS, cb, il);
auto the_ffn_norm = ffn_norm->extra ? ((ggml_split_tensor_t *)ffn_norm->extra)->splits[lctx.model.main_gpu] : ffn_norm;
cur = llm_build_norm(ctx, cur, lctx.model.hparams, the_ffn_norm, NULL, is_norm ? LLM_NORM : LLM_NORM_RMS, cb, il);
cb(input, "ffn_norm", il);
}
if (cur->type != GGML_TYPE_F32) {