Files
tabbyAPI/OAI/types/model.py
kingbri ce2602df9a Model: Fix max seq len handling
Previously, the max sequence length was overriden by the user's
config and never took the model's config.json into account.

Now, set the default to 4096, but include config.prepare when
selecting the max sequence length. The yaml and API request
now serve as overrides rather than parameters.

Signed-off-by: kingbri <bdashore3@proton.me>
2023-12-19 23:37:52 -05:00

58 lines
2.0 KiB
Python

from pydantic import BaseModel, Field, ConfigDict
from time import time
from typing import List, Optional
from gen_logging import LogConfig
class ModelCardParameters(BaseModel):
# Safe to do this since it's guaranteed to fetch a max seq len from model_container
max_seq_len: Optional[int] = None
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = 1.0
cache_mode: Optional[str] = "FP16"
prompt_template: Optional[str] = None
num_experts_per_token: Optional[int] = None
draft: Optional['ModelCard'] = None
class ModelCard(BaseModel):
id: str = "test"
object: str = "model"
created: int = Field(default_factory=lambda: int(time()))
owned_by: str = "tabbyAPI"
logging: Optional[LogConfig] = None
parameters: Optional[ModelCardParameters] = None
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)
class DraftModelLoadRequest(BaseModel):
draft_model_name: str
draft_rope_scale: Optional[float] = 1.0
draft_rope_alpha: Optional[float] = Field(description = "Automatically calculated if not present", default = None)
# TODO: Unify this with ModelCardParams
class ModelLoadRequest(BaseModel):
name: str
# Max seq len is defaulted when loading the model itself
max_seq_len: Optional[int] = None
gpu_split_auto: Optional[bool] = True
gpu_split: Optional[List[float]] = Field(default_factory=list)
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = Field(description = "Automatically calculated if not present", default = None)
no_flash_attention: Optional[bool] = False
# low_mem: Optional[bool] = False
cache_mode: Optional[str] = "FP16"
prompt_template: Optional[str] = None
num_experts_per_token: Optional[int] = None
draft: Optional[DraftModelLoadRequest] = None
class ModelLoadResponse(BaseModel):
# Avoids pydantic namespace warning
model_config = ConfigDict(protected_namespaces = [])
model_type: str = "model"
module: int
modules: int
status: str