gpt-oss: duplicate experts biases when necessary (#829)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-10-14 14:38:40 +03:00
committed by GitHub
parent 2b71974af9
commit ba9fefb73d
4 changed files with 47 additions and 18 deletions

View File

@@ -178,6 +178,9 @@ struct llama_layer {
struct ggml_tensor * ffn_gate_exps_b = nullptr;
struct ggml_tensor * ffn_down_exps_b = nullptr;
struct ggml_tensor * ffn_up_exps_b = nullptr;
struct ggml_tensor * ffn_gate_exps_b_dup = nullptr;
struct ggml_tensor * ffn_down_exps_b_dup = nullptr;
struct ggml_tensor * ffn_up_exps_b_dup = nullptr;
// ff shared expert (shexp)
struct ggml_tensor * ffn_gate_inp_shexp = nullptr;