mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 10:51:51 +00:00
Ported kimi-k2 support from llama.cpp (#609)
Original patch by @gabriellarson: https://github.com/ggml-org/llama.cpp/pull/14654 Co-authored-by: anikifoss <anikifoss>
This commit is contained in:
committed by
GitHub
parent
255c22046b
commit
f5353047ef
@@ -464,6 +464,13 @@ struct llm_tokenizer_bpe {
|
||||
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
|
||||
regex_exprs = {
|
||||
// K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
|
||||
// The custom handler implements all K2 patterns with proper Han character exclusion
|
||||
"\\p{Han}+",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_SUPERBPE:
|
||||
regex_exprs = {
|
||||
"\\p{N}+",
|
||||
|
||||
Reference in New Issue
Block a user