mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 09:09:50 +00:00
This is better
This commit is contained in:
@@ -786,21 +786,17 @@ void llm_load_hparams(
|
||||
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
|
||||
hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_TYPE_NONE;
|
||||
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
|
||||
if (hparams.n_layer == 47 || hparams.n_layer == 48) {
|
||||
if (hparams.expert_gating_func != LLM_EXPERT_GATING_FUNC_SIGMOID) {
|
||||
printf("============== Corrected experts gating function from %s to %s\n",
|
||||
llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)),
|
||||
llm_expert_gating_func_name(LLM_EXPERT_GATING_FUNC_SIGMOID));
|
||||
hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SIGMOID;
|
||||
}
|
||||
} else {
|
||||
if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) {
|
||||
// for compatibility with existing DeepSeek V2 and V2.5 GGUFs
|
||||
// that have no expert_gating_func model parameter set
|
||||
printf("============== Missing experts gating function -> set to SOFTMAX\n");
|
||||
hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SOFTMAX;
|
||||
}
|
||||
if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) {
|
||||
// Some models don't have the experts gating function recorded in the GGUF
|
||||
// Hence, we make the LLM_KV_EXPERT_GATING_FUNC entry optional, and set here if missing.
|
||||
// DeepSeek models normally have softmax as gating function, but there is GLM-4.7-Flash now
|
||||
// (identified via number of layers being 47 or 48), which uses sigmoid.
|
||||
hparams.expert_gating_func = hparams.n_layer == 47 || hparams.n_layer == 48 ?
|
||||
LLM_EXPERT_GATING_FUNC_SIGMOID : LLM_EXPERT_GATING_FUNC_SOFTMAX;
|
||||
printf("================= Missing experts gating function -> set to %s\n",
|
||||
llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)));
|
||||
}
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user