We need to of course pass the merged tensor to build_ffn

2026-04-21 23:19:22 +00:00 · 2026-01-12 16:05:14 +00:00
parent aad40bcd2d
commit ab1ec19151
1 changed files with 2 additions and 2 deletions
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -6936,7 +6936,7 @@ ggml_cgraph * llm_build_context::build_glm4_moe() {
                    n_expert, n_expert_used,
                    LLM_FFN_SILU, hparams.expert_weights_norm, true, hparams.expert_weights_scale,
                    (llm_expert_gating_func_type) hparams.expert_gating_func,
-                    LLM_FFN_SILU, cb, il, gf, true);
+                    LLM_FFN_SILU, cb, il, gf, true, model.layers[il].ffn_up_gate_exps);
        }

        // residual and context vector
@@ -8669,7 +8669,7 @@ ggml_cgraph* llm_build_context::build_minimaxm2() {
                LLM_FFN_SILU, true,
                false, 0,
                (llm_expert_gating_func_type)hparams.expert_gating_func,
-                cb, il, gf);
+                cb, il, gf, false, model.layers[il].ffn_up_gate_exps);
        cb(cur, "ffn_moe_out", il);

        cur = ggml_add(ctx0, cur, ffn_inp);