kimi-k2 convert script and chat template (#612)

* convert_hf_to_gguf for Kimi-K2-Instruct

Adapt mainline `PR14653` for tokenizer while maintaining proper MLA
tensors. Tested with this workflow using deepseek fp8_cast_bf16.py and
triton-cpu to upcast the fp8 safetensors to bf16 safetensors then used
this convert_hf_to_gguf.

* Add Kimi-K2 chat template

moonshotai/Kimi-K2-Instruct

https://github.com/ikawrakow/ik_llama.cpp/pull/609#issuecomment-3071259454

* kimi-k2 add ass to template to get response
This commit is contained in:
ubergarm
2025-07-15 13:54:04 -04:00
committed by GitHub
parent 2081b3fccb
commit 13b2f19372
3 changed files with 77 additions and 0 deletions

View File

@@ -1695,6 +1695,7 @@ enum llm_chat_template {
LLM_CHAT_TEMPLATE_BITNET,
LLM_CHAT_TEMPLATE_DOTS1,
LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
LLM_CHAT_TEMPLATE_KIMI_K2,
LLM_CHAT_TEMPLATE_UNKNOWN,
};
@@ -1733,6 +1734,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
{ "bitnet", LLM_CHAT_TEMPLATE_BITNET },
};
@@ -23270,6 +23272,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
return LLM_CHAT_TEMPLATE_DOTS1;
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
} else if (tmpl_contains("<|im_middle|>") && tmpl_contains("<|im_end|>")) {
return LLM_CHAT_TEMPLATE_KIMI_K2;
}
return LLM_CHAT_TEMPLATE_UNKNOWN;
}
@@ -23715,6 +23719,21 @@ static int32_t llama_chat_apply_template_internal(
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
}
}
} else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
// moonshotai/Kimi-K2-Instruct
for (auto message : chat) {
std::string role(message->role);
if (role == "system") {
ss << "<|im_system|>system<|im_middle|>" << message->content << "<|im_end|>";
} else if (role == "assistant") {
ss << "<|im_user|>user<|im_middle|>" << message->content << "<|im_end|>";
} else {
ss << "<|im_assistant|>assistant<|im_middle|>" << message->content << "<|im_end|>";
}
}
if (add_ass) {
ss << "<|im_assistant|>assistant<|im_middle|>";
}
} else {
// template not supported
return -1;