From acb0bc63fc9c0fc640b10e14a7d50deffcd32e49 Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Mon, 20 Oct 2025 08:21:21 +0300
Subject: [PATCH] Do not apply experts weight scale if it is 1

---
 src/llama-build-context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp
index dd0b62be..6693e7c2 100644
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -863,7 +863,7 @@ llm_expert_gating_func_type   gating_op,
 
         weights = ggml_reshape_3d(ctx, weights, 1, n_expert_used, n_tokens);
     }
-    if (scale_w) {
+    if (scale_w && std::abs(w_scale-1) > 1e-5f) {
         weights = ggml_scale(ctx, weights, w_scale);
         cb(weights, "ffn_moe_weights_scaled", il);
     }