From 047a5197719e7d21976a28445bd3004abd71c10d Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Wed, 19 Nov 2025 13:43:08 +0100 Subject: [PATCH] Make sure we can fuse Q and K RoPE for DeepSeek models (#985) Co-authored-by: Iwan Kawrakow --- src/llama-build-context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp index 658b05bd..a3391a4c 100644 --- a/src/llama-build-context.cpp +++ b/src/llama-build-context.cpp @@ -6054,6 +6054,8 @@ ggml_cgraph * llm_build_context::build_deepseek2() { cb(k_rope, "k_rope", il); cb(kv_compressed, "kv_compressed", il); + ggml_build_forward_expand(gf, q_rope); + ggml_build_forward_expand(gf, k_rope); if (rope_cache) { q_rope = ggml_rope_fast(ctx0, q_rope, rope_cache); k_rope = ggml_rope_fast(ctx0, k_rope, rope_cache); @@ -6066,6 +6068,8 @@ ggml_cgraph * llm_build_context::build_deepseek2() { } cb(q_rope, "q_rope", il); cb(k_rope, "k_rope", il); + ggml_build_forward_expand(gf, q_rope); + ggml_build_forward_expand(gf, k_rope); kv_compressed = llm_build_norm(ctx0, kv_compressed, hparams, model.layers[il].attn_kv_a_norm, NULL, LLM_NORM_RMS, cb, il); cb(kv_compressed, "kv_compressed", il);