Doesn't do much on the GPU either

This commit is contained in:
Iwan Kawrakow
2025-08-31 08:48:51 +03:00
parent cef57a6b13
commit ff4f403231
2 changed files with 77 additions and 14 deletions

View File

@@ -11108,6 +11108,10 @@ struct llm_build_context {
struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
cb(Vcur, "Vcur", il);
ggml_build_forward_expand(gf, Qcur);
ggml_build_forward_expand(gf, Kcur);
ggml_build_forward_expand(gf, Vcur);
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
Qcur = llm_build_norm(ctx0, Qcur, hparams, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, cb, il);
cb(Qcur, "Qcur_normed", il);