mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-06 03:50:08 +00:00
Chat fixes
This commit is contained in:
@@ -2340,7 +2340,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
|
||||
// @ngxson : quick hack for gpt-oss, always render these tokens
|
||||
for (const auto & t : token_to_id) {
|
||||
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
|
||||
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
|
||||
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
||||
}
|
||||
}
|
||||
@@ -2387,6 +2387,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
|
||||
if (has_return && has_call && has_end) {
|
||||
special_eog_ids.erase(end_id);
|
||||
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
||||
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
||||
}
|
||||
}
|
||||
@@ -2468,7 +2469,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
// set attributes by model/tokenizer/architecture name
|
||||
if (false
|
||||
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|
||||
|| _contains_any(general_arch, {"nomic-bert-moe"})
|
||||
|| _contains_any(general_arch, {"nomic-bert-moe", "jina-bert-v3"})
|
||||
) {
|
||||
if (token_to_id.count("<mask>") == 0) {
|
||||
LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);
|
||||
|
||||
Reference in New Issue
Block a user