mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
API: Re-add BOS token stripping in template render
Matching YALS, if the model has add_bos_token enabled, then remove an extra BOS token at the start of the prompt. This usually happens with misconfigured templates such as Llama 3. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -286,6 +286,16 @@ async def apply_chat_template(
|
||||
"add_generation_prompt is False"
|
||||
)
|
||||
|
||||
# Removes the starting BOS token if the model adds one
|
||||
# This is to prevent add_bos_token from adding multiple bos tokens
|
||||
bos_token = template_vars.get("bos_token")
|
||||
if (
|
||||
bos_token
|
||||
and model.container.hf_model.add_bos_token()
|
||||
and prompt.startswith(bos_token)
|
||||
):
|
||||
prompt = prompt.removeprefix(bos_token)
|
||||
|
||||
# Add template metadata
|
||||
await _append_template_metadata(data, template_vars)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user