gpt-oss: duplicate experts biases when necessary (#829)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-29 10:51:51 +00:00 · 2025-10-14 14:38:40 +03:00
parent 2b71974af9
commit ba9fefb73d
4 changed files with 47 additions and 18 deletions
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -178,6 +178,9 @@ struct llama_layer {
    struct ggml_tensor * ffn_gate_exps_b = nullptr;
    struct ggml_tensor * ffn_down_exps_b = nullptr;
    struct ggml_tensor * ffn_up_exps_b = nullptr;
+    struct ggml_tensor * ffn_gate_exps_b_dup = nullptr;
+    struct ggml_tensor * ffn_down_exps_b_dup = nullptr;
+    struct ggml_tensor * ffn_up_exps_b_dup = nullptr;

    // ff shared expert (shexp)
    struct ggml_tensor * ffn_gate_inp_shexp = nullptr;