mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
Do not allocate KV cache for unused layers
This commit is contained in:
@@ -532,7 +532,7 @@ static bool llama_kv_cache_init(
|
|||||||
|
|
||||||
const struct llama_hparams & hparams = model.hparams;
|
const struct llama_hparams & hparams = model.hparams;
|
||||||
|
|
||||||
const int64_t n_layer = hparams.n_layer;
|
const int64_t n_layer = hparams.n_layer - hparams.nextn_predict_layers;
|
||||||
|
|
||||||
cache.has_shift = false;
|
cache.has_shift = false;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user