mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-22 22:24:11 +00:00
Step-3.5: llama.cpp compatibility changes (#1240)
* Step-3.5: llama.cpp compatibility changes * Also read rope_freq_base_train_swa from the GGUF
This commit is contained in:
@@ -131,6 +131,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||
{ LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
|
||||
{ LLM_KV_SWIGLU_LIMITS, "%s.swiglu_limits" },
|
||||
{ LLM_KV_SWIGLU_LIMITS_SHARED, "%s.swiglu_limits_shared" },
|
||||
{ LLM_KV_SWIGLU_CLAMP_EXP, "%s.swiglu_clamp_exp" },
|
||||
{ LLM_KV_SWIGLU_CLAMP_SHEXP, "%s.swiglu_clamp_shexp" },
|
||||
|
||||
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
||||
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
||||
|
||||
@@ -124,6 +124,8 @@ enum llm_kv {
|
||||
LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
|
||||
LLM_KV_SWIGLU_LIMITS,
|
||||
LLM_KV_SWIGLU_LIMITS_SHARED,
|
||||
LLM_KV_SWIGLU_CLAMP_EXP,
|
||||
LLM_KV_SWIGLU_CLAMP_SHEXP,
|
||||
|
||||
LLM_KV_ATTENTION_HEAD_COUNT,
|
||||
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
||||
|
||||
@@ -1129,11 +1129,21 @@ void llm_load_hparams(
|
||||
if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) {
|
||||
hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SIGMOID;
|
||||
}
|
||||
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
|
||||
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
|
||||
ml.get_key(LLM_KV_ROPE_FREQ_BASE_SWA, hparams.rope_freq_base_train_swa);
|
||||
ml.get_key_or_arr(LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, hparams.swa_layers, hparams.n_layer);
|
||||
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_COUNT_PER_LAYER, hparams.rope_dim_per_layer, hparams.n_layer);
|
||||
ml.get_key_or_arr(LLM_KV_SWIGLU_LIMITS, hparams.swiglu_limits, hparams.n_layer);
|
||||
ml.get_key_or_arr(LLM_KV_SWIGLU_LIMITS_SHARED, hparams.swiglu_limits_shared, hparams.n_layer);
|
||||
if (!ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_COUNT_PER_LAYER, hparams.rope_dim_per_layer, hparams.n_layer, false)) {
|
||||
for (int i = 0; i < hparams.n_layer; ++i) {
|
||||
hparams.rope_dim_per_layer[i] = hparams.swa_layers[i] ? hparams.n_rot : hparams.n_rot/2;
|
||||
}
|
||||
}
|
||||
// The following two parameters: one of the two versions must be present in the GGUF
|
||||
if (!ml.get_key_or_arr(LLM_KV_SWIGLU_LIMITS, hparams.swiglu_limits, hparams.n_layer, false)) {
|
||||
ml.get_key_or_arr(LLM_KV_SWIGLU_CLAMP_EXP, hparams.swiglu_limits, hparams.n_layer, true);
|
||||
}
|
||||
if (!ml.get_key_or_arr(LLM_KV_SWIGLU_LIMITS_SHARED, hparams.swiglu_limits_shared, hparams.n_layer, false)) {
|
||||
ml.get_key_or_arr(LLM_KV_SWIGLU_CLAMP_SHEXP, hparams.swiglu_limits_shared, hparams.n_layer, true);
|
||||
}
|
||||
// Optional: Step35-only gating for applying rope scaling (HF: yarn_only_types).
|
||||
// Default is 3 (apply on all layers) if the key is absent.
|
||||
//ml.get_key(format("%s.rope.scaling.apply_mask", ml.get_arch_name().c_str()),
|
||||
@@ -1145,7 +1155,6 @@ void llm_load_hparams(
|
||||
hparams.rope_scaling_apply_mask, false);
|
||||
hparams.has_rope_freq_base_per_layer = ml.get_key_or_arr(LLM_KV_ROPE_FREQ_BASE_PER_LAYER,
|
||||
hparams.rope_freq_base_per_layer, hparams.n_layer, false);
|
||||
//type = LLM_TYPE_UNKNOWN; <--- what is this?
|
||||
} break;
|
||||
default: (void)0;
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ struct llama_hparams {
|
||||
float rope_freq_base_train_swa;
|
||||
float rope_freq_scale_train;
|
||||
float rope_freq_scale_train_swa;
|
||||
uint32_t rope_scaling_apply_mask = 0x3;
|
||||
uint32_t rope_scaling_apply_mask = 0x1;
|
||||
bool has_rope_freq_base_per_layer = false;
|
||||
uint32_t n_ctx_orig_yarn;
|
||||
float rope_yarn_log_mul = 0.0f;
|
||||
|
||||
Reference in New Issue
Block a user