Arghh, we need to increase the context size again

2026-03-12 23:10:01 +00:00 · 2026-01-12 14:59:15 +02:00
parent 9821ac7b9c
commit 74dc8aa99e
1 changed files with 4 additions and 1 deletions
--- a/src/llama-load-tensors.cpp
+++ b/src/llama-load-tensors.cpp
@@ -199,7 +199,10 @@ create_tensors_helper::create_tensors_helper(llama_model_loader & _ml, llama_mod
        buft_layer_count[model.buft_layer[i].buft_matrix]++;
    }

-    ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
+    auto n_tensors = ml.n_tensors;
+    if (ml.merge_qkv) n_tensors += n_layer;
+    if (ml.merge_up_gate_exps) n_tensors += n_layer;
+    ctx_size = ggml_tensor_overhead()*(n_tensors + 1); // +1 for models where tok_embd is duplicated as output
    ctx_size += ggml_tensor_overhead()*n_layer*3;         // for moe merged tensors

    if (model.splits.size() > 1) {