mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-02 12:39:54 +00:00
Don't split the output tensor
This commit is contained in:
@@ -388,7 +388,7 @@ void create_tensors_helper::create_std_ffn(int i, const LLM_TN & tn, llama_layer
|
||||
|
||||
bool create_tensors_helper::create_llama_tensors(const LLM_TN & tn) {
|
||||
LOADING_PRELUDE
|
||||
create_embd_output(tn, n_embd, n_vocab, true, true);
|
||||
create_embd_output(tn, n_embd, n_vocab, true, false); //true);
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
ggml_context * ctx_layer = ctx_for_layer(i);
|
||||
@@ -1843,7 +1843,7 @@ bool create_tensors_helper::create_glm4_moe_tensors(const LLM_TN & tn) {
|
||||
GGML_ASSERT(hparams.n_expert > 0 && "n_expert must be > 0 for GLM4_MOE MoE layers");
|
||||
GGML_ASSERT(hparams.n_expert_used > 0 && "n_expert_used must be > 0 for GLM4_MOE MoE layers");
|
||||
|
||||
create_embd_output(tn, n_embd, n_vocab, true, true);
|
||||
create_embd_output(tn, n_embd, n_vocab, true, false); //true);
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
ggml_context * ctx_layer = ctx_for_layer(i);
|
||||
|
||||
Reference in New Issue
Block a user