Apply f_logit_scale before mul mat with output tensor

This commit is contained in:
Iwan Kawrakow
2025-12-12 15:45:56 +00:00
parent 3100f03770
commit 117eaf9c9e

View File

@@ -7263,14 +7263,13 @@ ggml_cgraph * llm_build_context::build_cohere2() {
cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, NULL, LLM_NORM, cb, -1);
cb(cur, "result_norm", -1);
// lm_head
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
cb(cur, "output", -1);
if (f_logit_scale) {
cur = ggml_scale(ctx0, cur, f_logit_scale);
cb(cur, "result_norm_scaled", -1);
}
// lm_head
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
cb(cur, "result_output", -1);
ggml_build_forward_expand(gf, cur);