diff --git a/src/llama-load-tensors.cpp b/src/llama-load-tensors.cpp index 493aec56..227d94e6 100644 --- a/src/llama-load-tensors.cpp +++ b/src/llama-load-tensors.cpp @@ -388,7 +388,7 @@ void create_tensors_helper::create_std_ffn(int i, const LLM_TN & tn, llama_layer bool create_tensors_helper::create_llama_tensors(const LLM_TN & tn) { LOADING_PRELUDE - create_embd_output(tn, n_embd, n_vocab, true, true); + create_embd_output(tn, n_embd, n_vocab, true, false); //true); for (int i = 0; i < n_layer; ++i) { ggml_context * ctx_layer = ctx_for_layer(i); @@ -1843,7 +1843,7 @@ bool create_tensors_helper::create_glm4_moe_tensors(const LLM_TN & tn) { GGML_ASSERT(hparams.n_expert > 0 && "n_expert must be > 0 for GLM4_MOE MoE layers"); GGML_ASSERT(hparams.n_expert_used > 0 && "n_expert_used must be > 0 for GLM4_MOE MoE layers"); - create_embd_output(tn, n_embd, n_vocab, true, true); + create_embd_output(tn, n_embd, n_vocab, true, false); //true); for (int i = 0; i < n_layer; ++i) { ggml_context * ctx_layer = ctx_for_layer(i);