diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp index e10e6cfe..4bd36286 100644 --- a/src/llama-build-context.cpp +++ b/src/llama-build-context.cpp @@ -6634,9 +6634,9 @@ ggml_cgraph * llm_build_context::build_deepseek2() { } ggml_tensor * kq = ggml_mul_mat(ctx0, kv_cache, q); - if (kv_cache->ne[1] < 256) { + //if (kv_cache->ne[1] < 256) { ggml_mul_mat_set_prec(kq, GGML_PREC_F32); - } + //} cb(kq, "kq", il); if (!pp_opt) { @@ -6653,6 +6653,7 @@ ggml_cgraph * llm_build_context::build_deepseek2() { } kqv_compressed = ggml_mul_mat(ctx0, kv_cache_trans, kq); + ggml_mul_mat_set_prec(kqv_compressed, GGML_PREC_F32); cb(kqv_compressed, "kqv_compressed", il); if (!pp_opt) { diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp index 24d07a9a..d083225a 100644 --- a/src/llama-hparams.cpp +++ b/src/llama-hparams.cpp @@ -756,12 +756,20 @@ void llm_load_hparams( } break; case LLM_ARCH_DEEPSEEK2: { + ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv); if (hparams.n_head_kv() == 1) { int n_nead_kv = hparams.n_gqa(); - if (n_nead_kv%4 != 0 || hparams.n_embd_head_k != 576 || hparams.n_embd_head_v != 512 || + + int expected_n_embd_head_k = hparams.n_embd_head_v + hparams.n_rot; + if (n_nead_kv%4 != 0 || hparams.n_embd_head_k != expected_n_embd_head_k || (hparams.n_embd_head_v % 512) != 0 || hparams.n_rot != 64) { printf("==========================================================================\n"); printf("Detected incompatible DeepSeek model without a known way to fixc it.\n"); + printf("n_nead_kv = %d\n", n_nead_kv); + printf("hparams.n_embd_head_k = %d\n", hparams.n_embd_head_k); + printf("hparams.n_embd_head_v = %d\n", hparams.n_embd_head_v); + printf("hparams.n_lora_kv = %d\n", hparams.n_lora_kv); + printf("hparams.n_rot = %d\n", hparams.n_rot); printf("Consider making your own ik_llama.cpp compatible model or\n"); printf("ask the model provider to make one for you,\n\n"); printf("Sorry, uknown model => cannot fix it => bailing out\n"); @@ -781,7 +789,6 @@ void llm_load_hparams( if (!is_lite) { ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q); } - ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv); ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);