diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 32c31e8d..e5ff92da 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2340,7 +2340,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { // @ngxson : quick hack for gpt-oss, always render these tokens for (const auto & t : token_to_id) { - if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") { + if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") { id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED; } } @@ -2387,6 +2387,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { if (has_return && has_call && has_end) { special_eog_ids.erase(end_id); + id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED; LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__); } } @@ -2468,7 +2469,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { // set attributes by model/tokenizer/architecture name if (false || _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"}) - || _contains_any(general_arch, {"nomic-bert-moe"}) + || _contains_any(general_arch, {"nomic-bert-moe", "jina-bert-v3"}) ) { if (token_to_id.count("") == 0) { LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);