mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-03 10:30:27 +00:00
Fix graph parallel when ngl < n_layers (#1241)
* Fix graph parallel when ngl < n_layers * Fix using ffn_norm When using graph parallel with ngl < n_layers, the ffn_norm tensor may have ended up being split, while the ffn tensors are on the CPU. In that case we will get a crash because we attempt to use the not-split buffer of ffn_norm, which is invalid. Thi commit fixes that. * Cleanup
This commit is contained in:
@@ -744,8 +744,12 @@ ggml_tensor * llm_build_context::llm_build_ffn(
|
||||
}
|
||||
|
||||
auto cur = input;
|
||||
//if (input->op == GGML_OP_REDUCE) {
|
||||
// if (input->src[lctx.model.main_gpu]) cur = input->src[lctx.model.main_gpu];
|
||||
//}
|
||||
if (ffn_norm) {
|
||||
cur = llm_build_norm(ctx, cur, lctx.model.hparams, ffn_norm, NULL, is_norm ? LLM_NORM : LLM_NORM_RMS, cb, il);
|
||||
auto the_ffn_norm = ffn_norm->extra ? ((ggml_split_tensor_t *)ffn_norm->extra)->splits[lctx.model.main_gpu] : ffn_norm;
|
||||
cur = llm_build_norm(ctx, cur, lctx.model.hparams, the_ffn_norm, NULL, is_norm ? LLM_NORM : LLM_NORM_RMS, cb, il);
|
||||
cb(input, "ffn_norm", il);
|
||||
}
|
||||
if (cur->type != GGML_TYPE_F32) {
|
||||
|
||||
Reference in New Issue
Block a user