mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-06 03:50:08 +00:00
GLM-5 support (#1268)
This commit is contained in:
@@ -761,7 +761,7 @@ void llm_load_hparams(
|
||||
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k != 576 || hparams.n_embd_head_v != 512 ||
|
||||
hparams.n_rot != 64) {
|
||||
printf("==========================================================================\n");
|
||||
printf("Detected incompatible DeepSeek model without a known way to fixc it.\n");
|
||||
printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
|
||||
printf("Consider making your own ik_llama.cpp compatible model or\n");
|
||||
printf("ask the model provider to make one for you,\n\n");
|
||||
printf("Sorry, uknown model => cannot fix it => bailing out\n");
|
||||
@@ -1157,6 +1157,67 @@ void llm_load_hparams(
|
||||
hparams.rope_freq_base_per_layer, hparams.n_layer, false);
|
||||
GGML_ASSERT(hparams.has_rope_freq_base_per_layer || have_rfb_train_swa);
|
||||
} break;
|
||||
case LLM_ARCH_GLM_DSA:
|
||||
{
|
||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_sections, 4, false);
|
||||
|
||||
// MoE parameters
|
||||
ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert);
|
||||
ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used);
|
||||
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
||||
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead, false);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
|
||||
|
||||
// deepseek MLA parameters
|
||||
ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
|
||||
ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
|
||||
//ml.get_key(LLM_KV_ATTENTION_KEY_LENGTH_MLA, hparams.n_embd_head_k_mla_impl, false);
|
||||
//ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_MLA, hparams.n_embd_head_v_mla_impl, false);
|
||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
||||
|
||||
// DSA parameters
|
||||
ml.get_key(LLM_KV_ATTENTION_INDEXER_HEAD_COUNT, hparams.indexer_n_head);
|
||||
ml.get_key(LLM_KV_ATTENTION_INDEXER_KEY_LENGTH, hparams.indexer_head_size);
|
||||
ml.get_key(LLM_KV_ATTENTION_INDEXER_TOP_K, hparams.indexer_top_k);
|
||||
|
||||
// Expert gating function (GLM-4.5 uses sigmoid)
|
||||
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
|
||||
if (hparams.expert_gating_func == LLM_EXPERT_GATING_FUNC_TYPE_NONE) {
|
||||
hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SIGMOID;
|
||||
}
|
||||
|
||||
// NextN/MTP parameters
|
||||
ml.get_key(LLM_KV_NEXTN_PREDICT_LAYERS, hparams.nextn_predict_layers, false);
|
||||
|
||||
// TODO: when MTP is implemented, this should probably be updated if needed
|
||||
hparams.n_layer_kv_from_start = hparams.n_layer - hparams.nextn_predict_layers;
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 79: model.type = MODEL_744B_A40B; break;
|
||||
default: model.type = MODEL_UNKNOWN;
|
||||
}
|
||||
if (hparams.n_head_kv() == 1) {
|
||||
int n_nead_kv = hparams.n_gqa();
|
||||
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k != 576 || hparams.n_embd_head_v != 512 ||
|
||||
hparams.n_rot != 64) {
|
||||
printf("==========================================================================\n");
|
||||
printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
|
||||
printf("Sorry, uknown model => cannot fix it => bailing out\n");
|
||||
printf("==========================================================================\n");
|
||||
GGML_ABORT("Fatal error");
|
||||
}
|
||||
printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
|
||||
for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
|
||||
hparams.n_embd_head_k = 192;
|
||||
hparams.n_embd_head_v = 128;
|
||||
ml.get_key(LLM_KV_ATTENTION_KEY_LENGTH_MLA, hparams.n_embd_head_k);
|
||||
ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_MLA, hparams.n_embd_head_v);
|
||||
}
|
||||
} break;
|
||||
default: (void)0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user