Add Falcon3 pre-tokinizer (same as llama3)

This commit is contained in:
Iwan Kawrakow
2025-01-10 13:36:58 +02:00
parent 3e6851621c
commit 712f348f85

View File

@@ -5552,7 +5552,8 @@ static void llm_load_vocab(
} else if (
tokenizer_pre == "llama3" ||
tokenizer_pre == "llama-v3" ||
tokenizer_pre == "llama-bpe") {
tokenizer_pre == "llama-bpe"||
tokenizer_pre == "falcon3") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
vocab.tokenizer_ignore_merges = true;
vocab.tokenizer_add_bos = true;