API: Ignore add_bos_token in chat completions

When fetching special tokens from the model, don't factor in the
add_bos_token and ban_eos_token parameters as switches.

In addition, change the internal handling of add_bos_token to an optional
boolean. This allows us to fallback to the model when selecting whether
or not to add the BOS token, especially for chat completions.

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri
2025-05-01 22:51:15 -04:00
parent 3960612d38
commit aa657fa6e9
4 changed files with 13 additions and 17 deletions

View File

@@ -123,7 +123,7 @@ class BaseModelContainer(abc.ABC):
pass
@abc.abstractmethod
def get_special_tokens(self, **kwargs) -> Dict[str, Any]:
def get_special_tokens(self) -> Dict[str, Any]:
"""
Gets special tokens used by the model/tokenizer.

View File

@@ -843,12 +843,10 @@ class ExllamaV2Container(BaseModelContainer):
decode_special_tokens=unwrap(kwargs.get("decode_special_tokens"), True),
)[0]
def get_special_tokens(
self, add_bos_token: bool = True, ban_eos_token: bool = False
):
def get_special_tokens(self):
return {
"bos_token": self.tokenizer.bos_token if add_bos_token else "",
"eos_token": self.tokenizer.eos_token if not ban_eos_token else "",
"bos_token": self.tokenizer.bos_token,
"eos_token": self.tokenizer.eos_token,
"pad_token": self.tokenizer.pad_token,
"unk_token": self.tokenizer.unk_token,
}
@@ -1242,7 +1240,7 @@ class ExllamaV2Container(BaseModelContainer):
) and gen_settings.token_repetition_range == -1
stop_conditions = params.stop
add_bos_token = params.add_bos_token
add_bos_token = unwrap(params.add_bos_token, True)
ban_eos_token = params.ban_eos_token
# Fetch EOS tokens from generation_config if they exist