Deepseek V3 support added (#176)

Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
saood06
2025-01-23 10:24:10 -06:00
committed by GitHub
parent 6d1b4adaac
commit 5c0a01bdaf
9 changed files with 136 additions and 5 deletions

View File

@@ -367,6 +367,13 @@ struct llm_tokenizer_bpe {
"\\p{N}+",
};
break;
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
regex_exprs = {
"\\p{N}{1,3}",
"[一-龥぀-ゟ゠-ヿ]+",
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
};
break;
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
regex_exprs = {
"[\r\n]",