Make HuiHui work

This commit is contained in:
Kawrakow
2026-01-29 15:31:23 +00:00
parent 686fd1ebec
commit 0f6cdd4aec
2 changed files with 12 additions and 4 deletions

View File

@@ -6634,9 +6634,9 @@ ggml_cgraph * llm_build_context::build_deepseek2() {
}
ggml_tensor * kq = ggml_mul_mat(ctx0, kv_cache, q);
if (kv_cache->ne[1] < 256) {
//if (kv_cache->ne[1] < 256) {
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
}
//}
cb(kq, "kq", il);
if (!pp_opt) {
@@ -6653,6 +6653,7 @@ ggml_cgraph * llm_build_context::build_deepseek2() {
}
kqv_compressed = ggml_mul_mat(ctx0, kv_cache_trans, kq);
ggml_mul_mat_set_prec(kqv_compressed, GGML_PREC_F32);
cb(kqv_compressed, "kqv_compressed", il);
if (!pp_opt) {

View File

@@ -756,12 +756,20 @@ void llm_load_hparams(
} break;
case LLM_ARCH_DEEPSEEK2:
{
ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
if (hparams.n_head_kv() == 1) {
int n_nead_kv = hparams.n_gqa();
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k != 576 || hparams.n_embd_head_v != 512 ||
int expected_n_embd_head_k = hparams.n_embd_head_v + hparams.n_rot;
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k != expected_n_embd_head_k || (hparams.n_embd_head_v % 512) != 0 ||
hparams.n_rot != 64) {
printf("==========================================================================\n");
printf("Detected incompatible DeepSeek model without a known way to fixc it.\n");
printf("n_nead_kv = %d\n", n_nead_kv);
printf("hparams.n_embd_head_k = %d\n", hparams.n_embd_head_k);
printf("hparams.n_embd_head_v = %d\n", hparams.n_embd_head_v);
printf("hparams.n_lora_kv = %d\n", hparams.n_lora_kv);
printf("hparams.n_rot = %d\n", hparams.n_rot);
printf("Consider making your own ik_llama.cpp compatible model or\n");
printf("ask the model provider to make one for you,\n\n");
printf("Sorry, uknown model => cannot fix it => bailing out\n");
@@ -781,7 +789,6 @@ void llm_load_hparams(
if (!is_lite) {
ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
}
ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);