From 9e07839ba3077c1e5eda99895a418525ae14cea8 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Wed, 21 Jan 2026 07:53:18 +0200 Subject: [PATCH] Correct GLM-4.7-Flash gating function (#1174) * Correct GLM-4.7-Flash gating function * This is better --- src/llama-hparams.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp index 79e0a1d0..7b889c9c 100644 --- a/src/llama-hparams.cpp +++ b/src/llama-hparams.cpp @@ -27,6 +27,14 @@ const char * llama_hparams::rope_scaling_type_name(llama_rope_scaling_type type) return LLAMA_ROPE_SCALING_TYPES.at(type); } +static inline const char * llm_expert_gating_func_name(llm_expert_gating_func_type type) { + switch (type) { + case LLM_EXPERT_GATING_FUNC_SOFTMAX: return "softmax"; + case LLM_EXPERT_GATING_FUNC_SIGMOID: return "sigmoid"; + case LLM_EXPERT_GATING_FUNC_TYPE_SOFTMAX_WEIGHT: return "weight"; + default: return "none"; + } +} void llm_load_hparams( @@ -778,11 +786,17 @@ void llm_load_hparams( ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false); + hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_TYPE_NONE; ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false); - if (hparams.expert_gating_func == 0) { - // for compatibility with existing DeepSeek V2 and V2.5 GGUFs - // that have no expert_gating_func model parameter set - hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SOFTMAX; + if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) { + // Some models don't have the experts gating function recorded in the GGUF + // Hence, we make the LLM_KV_EXPERT_GATING_FUNC entry optional, and set here if missing. + // DeepSeek models normally have softmax as gating function, but there is GLM-4.7-Flash now + // (identified via number of layers being 47 or 48), which uses sigmoid. + hparams.expert_gating_func = hparams.n_layer == 47 || hparams.n_layer == 48 ? + LLM_EXPERT_GATING_FUNC_SIGMOID : LLM_EXPERT_GATING_FUNC_SOFTMAX; + printf("================= Missing experts gating function -> set to %s\n", + llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func))); } ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);