mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
Model: Add TokenizerConfig stub and add_eos_token fallback
This stub fetches the add_eos_token field from the HF tokenizer config. Ideally, this should be in the backend rather than tabby. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -239,6 +239,7 @@ async def find_prompt_template(template_name, model_dir: pathlib.Path):
|
||||
]
|
||||
|
||||
# Add lookup from prompt template name if provided
|
||||
# TODO: Possibly link to the TokenizerConfig class
|
||||
if template_name:
|
||||
find_template_functions[:0] = [
|
||||
lambda: PromptTemplate.from_file(pathlib.Path("templates") / template_name),
|
||||
|
||||
@@ -53,3 +53,23 @@ class HuggingFaceConfig(BaseModel):
|
||||
contents = await hf_config_json.read()
|
||||
hf_config_dict = json.loads(contents)
|
||||
return cls.model_validate(hf_config_dict)
|
||||
|
||||
|
||||
class TokenizerConfig(BaseModel):
|
||||
"""
|
||||
An abridged version of HuggingFace's tokenizer config.
|
||||
"""
|
||||
|
||||
add_bos_token: Optional[bool] = None
|
||||
|
||||
@classmethod
|
||||
async def from_file(cls, model_directory: pathlib.Path):
|
||||
"""Create an instance from a tokenizer config file."""
|
||||
|
||||
tokenizer_config_path = model_directory / "tokenizer_config.json"
|
||||
async with aiofiles.open(
|
||||
tokenizer_config_path, "r", encoding="utf8"
|
||||
) as tokenizer_config_json:
|
||||
contents = await tokenizer_config_json.read()
|
||||
tokenizer_config_dict = json.loads(contents)
|
||||
return cls.model_validate(tokenizer_config_dict)
|
||||
|
||||
Reference in New Issue
Block a user