Doesn't do much on the GPU either

2026-02-27 16:44:21 +00:00 · 2025-08-31 08:48:51 +03:00
parent cef57a6b13
commit ff4f403231
2 changed files with 77 additions and 14 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -11108,6 +11108,10 @@ struct llm_build_context {
                struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
                cb(Vcur, "Vcur", il);

+                ggml_build_forward_expand(gf, Qcur);
+                ggml_build_forward_expand(gf, Kcur);
+                ggml_build_forward_expand(gf, Vcur);
+
 		Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head,    n_tokens);
                Qcur = llm_build_norm(ctx0, Qcur, hparams, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, cb, il);
                cb(Qcur, "Qcur_normed", il);