From 74dc8aa99eb18b6c5ca02ad79ec210819ea1500c Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 12 Jan 2026 14:59:15 +0200 Subject: [PATCH] Arghh, we need to increase the context size again --- src/llama-load-tensors.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llama-load-tensors.cpp b/src/llama-load-tensors.cpp index c7fc9050..fcddc183 100644 --- a/src/llama-load-tensors.cpp +++ b/src/llama-load-tensors.cpp @@ -199,7 +199,10 @@ create_tensors_helper::create_tensors_helper(llama_model_loader & _ml, llama_mod buft_layer_count[model.buft_layer[i].buft_matrix]++; } - ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output + auto n_tensors = ml.n_tensors; + if (ml.merge_qkv) n_tensors += n_layer; + if (ml.merge_up_gate_exps) n_tensors += n_layer; + ctx_size = ggml_tensor_overhead()*(n_tensors + 1); // +1 for models where tok_embd is duplicated as output ctx_size += ggml_tensor_overhead()*n_layer*3; // for moe merged tensors if (model.splits.size() > 1) {