From 5a633bb0e95a3e7148ea4abbdf7d2ce585f113b4 Mon Sep 17 00:00:00 2001 From: Downtown-Case Date: Wed, 1 Oct 2025 13:37:31 -0500 Subject: [PATCH] Mark some multi-prediction tensors as not required. (#814) --- src/llama.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index e6a736b4..79dfdcc0 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6741,7 +6741,7 @@ static bool llm_load_tensors( layer.nextn.embed_tokens = create_tensor(ctx_for_layer(final_layer), tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", final_layer), { n_embd, n_vocab }, - flags); + flags | TENSOR_NOT_REQUIRED); // ENORM, HNORM: [embd] layer.nextn.enorm = create_tensor(ctx_for_layer(final_layer), tn(LLM_TENSOR_NEXTN_ENORM, "weight", final_layer), @@ -6755,12 +6755,12 @@ static bool llm_load_tensors( layer.nextn.shared_head_head = create_tensor(ctx_for_layer(final_layer), tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", final_layer), { n_embd, n_vocab }, - flags); + flags | TENSOR_NOT_REQUIRED); // SHARED_HEAD_NORM: [embd] layer.nextn.shared_head_norm = create_tensor(ctx_for_layer(final_layer), tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", final_layer), { n_embd }, - flags); + flags | TENSOR_NOT_REQUIRED); } } }