From 487411b67695de4b4c9f79363d59357baa0c4d52 Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Wed, 21 Jan 2026 05:52:10 +0000
Subject: [PATCH] This is better

---
 src/llama-hparams.cpp | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp
index f2b2d0d4..7b889c9c 100644
--- a/src/llama-hparams.cpp
+++ b/src/llama-hparams.cpp
@@ -786,21 +786,17 @@ void llm_load_hparams(
                 ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
                 ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
                 ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
+                hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_TYPE_NONE;
                 ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
-                if (hparams.n_layer == 47 || hparams.n_layer == 48) {
-                    if (hparams.expert_gating_func != LLM_EXPERT_GATING_FUNC_SIGMOID) {
-                        printf("============== Corrected experts gating function from %s to %s\n",
-                                llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)),
-                                llm_expert_gating_func_name(LLM_EXPERT_GATING_FUNC_SIGMOID));
-                        hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SIGMOID;
-                    }
-                } else {
-                    if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) {
-                        // for compatibility with existing DeepSeek V2 and V2.5 GGUFs
-                        // that have no expert_gating_func model parameter set
-                        printf("============== Missing experts gating function -> set to SOFTMAX\n");
-                        hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SOFTMAX;
-                    }
+                if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) {
+                    // Some models don't have the experts gating function recorded in the GGUF
+                    // Hence, we make the LLM_KV_EXPERT_GATING_FUNC entry optional, and set here if missing.
+                    // DeepSeek models normally have softmax as gating function, but there is GLM-4.7-Flash now
+                    // (identified via number of layers being 47 or 48), which uses sigmoid.
+                    hparams.expert_gating_func = hparams.n_layer == 47 || hparams.n_layer == 48 ?
+                        LLM_EXPERT_GATING_FUNC_SIGMOID : LLM_EXPERT_GATING_FUNC_SOFTMAX;
+                    printf("================= Missing experts gating function -> set to %s\n",
+                            llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)));
                 }
                 ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);