Model: Add TokenizerConfig stub and add_eos_token fallback

This stub fetches the add_eos_token field from the HF tokenizer config. Ideally, this should be in the backend rather than tabby. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2026-04-28 18:21:42 +00:00 · 2025-05-02 00:08:01 -04:00
parent aa657fa6e9
commit 47cb2a0de9
3 changed files with 46 additions and 3 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -50,7 +50,7 @@ from common.health import HealthManager
 from common.multimodal import MultimodalEmbeddingWrapper
 from common.sampling import BaseSamplerRequest
 from common.templating import PromptTemplate, find_prompt_template
-from common.transformers_utils import GenerationConfig
+from common.transformers_utils import GenerationConfig, TokenizerConfig
 from common.utils import calculate_rope_alpha, coalesce, unwrap
 from endpoints.core.types.model import ModelCard, ModelCardParameters
@@ -80,6 +80,7 @@ class ExllamaV2Container(BaseModelContainer):
    draft_cache_mode: str = "FP16"
    max_batch_size: Optional[int] = None
    generation_config: Optional[GenerationConfig] = None
    tokenizer_config: Optional[TokenizerConfig] = None
    # GPU split vars
    gpu_split: List[float] = []
@@ -130,7 +131,7 @@ class ExllamaV2Container(BaseModelContainer):
        if generation_config_path.exists():
            try:
                self.generation_config = await GenerationConfig.from_file(
-                    generation_config_path.parent
+                    model_directory
                )
            except Exception:
                logger.error(traceback.format_exc())
@@ -138,6 +139,19 @@ class ExllamaV2Container(BaseModelContainer):
                    "Skipping generation config load because of an unexpected error."
                )
        # Load tokenizer config overrides
        tokenizer_config_path = model_directory / "tokenizer_config.json"
        if tokenizer_config_path.exists():
            try:
                self.tokenizer_config = await TokenizerConfig.from_file(
                    model_directory
                )
            except Exception:
                logger.error(traceback.format_exc())
                logger.warning(
                    "Skipping tokenizer config load because of an unexpected error."
                )
        # Set vision state and error if vision isn't supported on the current model
        self.use_vision = unwrap(kwargs.get("vision"), False)
        if self.use_vision and not self.config.vision_model_type:
@@ -1240,9 +1254,17 @@ class ExllamaV2Container(BaseModelContainer):
        ) and gen_settings.token_repetition_range == -1
        stop_conditions = params.stop
        add_bos_token = unwrap(params.add_bos_token, True)
        ban_eos_token = params.ban_eos_token
        print(self.tokenizer_config.add_bos_token)
        # Set add_bos_token for generation
        add_bos_token = coalesce(
            params.add_bos_token, self.tokenizer_config.add_bos_token, True
        )
        print(add_bos_token)
        # Fetch EOS tokens from generation_config if they exist
        eos_tokens = (
            self.generation_config.eos_tokens()
--- a/common/templating.py
+++ b/common/templating.py
@@ -239,6 +239,7 @@ async def find_prompt_template(template_name, model_dir: pathlib.Path):
        ]
    # Add lookup from prompt template name if provided
    # TODO: Possibly link to the TokenizerConfig class
    if template_name:
        find_template_functions[:0] = [
            lambda: PromptTemplate.from_file(pathlib.Path("templates") / template_name),
--- a/common/transformers_utils.py
+++ b/common/transformers_utils.py
@@ -53,3 +53,23 @@ class HuggingFaceConfig(BaseModel):
            contents = await hf_config_json.read()
            hf_config_dict = json.loads(contents)
            return cls.model_validate(hf_config_dict)
 class TokenizerConfig(BaseModel):
    """
    An abridged version of HuggingFace's tokenizer config.
    """
    add_bos_token: Optional[bool] = None
    @classmethod
    async def from_file(cls, model_directory: pathlib.Path):
        """Create an instance from a tokenizer config file."""
        tokenizer_config_path = model_directory / "tokenizer_config.json"
        async with aiofiles.open(
            tokenizer_config_path, "r", encoding="utf8"
        ) as tokenizer_config_json:
            contents = await tokenizer_config_json.read()
            tokenizer_config_dict = json.loads(contents)
            return cls.model_validate(tokenizer_config_dict)