mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-26 01:08:52 +00:00
API: Re-add BOS token stripping in template render
Matching YALS, if the model has add_bos_token enabled, then remove an extra BOS token at the start of the prompt. This usually happens with misconfigured templates such as Llama 3. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -286,6 +286,16 @@ async def apply_chat_template(
|
|||||||
"add_generation_prompt is False"
|
"add_generation_prompt is False"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Removes the starting BOS token if the model adds one
|
||||||
|
# This is to prevent add_bos_token from adding multiple bos tokens
|
||||||
|
bos_token = template_vars.get("bos_token")
|
||||||
|
if (
|
||||||
|
bos_token
|
||||||
|
and model.container.hf_model.add_bos_token()
|
||||||
|
and prompt.startswith(bos_token)
|
||||||
|
):
|
||||||
|
prompt = prompt.removeprefix(bos_token)
|
||||||
|
|
||||||
# Add template metadata
|
# Add template metadata
|
||||||
await _append_template_metadata(data, template_vars)
|
await _append_template_metadata(data, template_vars)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user