Do not allocate KV cache for unused layers

2026-03-06 12:00:29 +00:00 · 2025-10-20 08:35:25 +03:00
parent 1789de5994
commit 599c812f12
1 changed files with 1 additions and 1 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -532,7 +532,7 @@ static bool llama_kv_cache_init(

    const struct llama_hparams & hparams = model.hparams;

-    const int64_t  n_layer = hparams.n_layer;
+    const int64_t  n_layer = hparams.n_layer - hparams.nextn_predict_layers;

    cache.has_shift = false;