mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 02:41:47 +00:00
Add Falcon-Edge support (#555)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -393,6 +393,20 @@ struct llm_tokenizer_bpe {
|
||||
"[0-9][0-9][0-9]",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_FALCON_3:
|
||||
regex_exprs = {
|
||||
"[\\p{P}\\$\\+<=>\\^~\\|`]+",
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
"[0-9]",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_FALCON_E:
|
||||
regex_exprs = {
|
||||
"[\\p{P}\\$\\+<=>\\^~\\|`]+",
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
"[0-9]",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_STARCODER:
|
||||
case LLAMA_VOCAB_PRE_TYPE_REFACT:
|
||||
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
|
||||
|
||||
Reference in New Issue
Block a user