mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
Move common functions into their own folder and refactor the backends to use their own folder as well. Also cleanup imports and alphabetize import statments themselves. Finally, move colab and docker into their own folders as well. Signed-off-by: kingbri <bdashore3@proton.me>
106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
""" Contains model card types. """
|
|
from pydantic import BaseModel, Field, ConfigDict
|
|
from time import time
|
|
from typing import List, Optional
|
|
|
|
from common.gen_logging import LogPreferences
|
|
|
|
|
|
class ModelCardParameters(BaseModel):
|
|
"""Represents model card parameters."""
|
|
|
|
# Safe to do this since it's guaranteed to fetch a max seq len
|
|
# from model_container
|
|
max_seq_len: Optional[int] = None
|
|
rope_scale: Optional[float] = 1.0
|
|
rope_alpha: Optional[float] = 1.0
|
|
cache_mode: Optional[str] = "FP16"
|
|
prompt_template: Optional[str] = None
|
|
num_experts_per_token: Optional[int] = None
|
|
use_cfg: Optional[bool] = None
|
|
draft: Optional["ModelCard"] = None
|
|
|
|
|
|
class ModelCard(BaseModel):
|
|
"""Represents a single model card."""
|
|
|
|
id: str = "test"
|
|
object: str = "model"
|
|
created: int = Field(default_factory=lambda: int(time()))
|
|
owned_by: str = "tabbyAPI"
|
|
logging: Optional[LogPreferences] = None
|
|
parameters: Optional[ModelCardParameters] = None
|
|
|
|
|
|
class ModelList(BaseModel):
|
|
"""Represents a list of model cards."""
|
|
|
|
object: str = "list"
|
|
data: List[ModelCard] = Field(default_factory=list)
|
|
|
|
|
|
class DraftModelLoadRequest(BaseModel):
|
|
"""Represents a draft model load request."""
|
|
|
|
draft_model_name: str
|
|
draft_rope_scale: Optional[float] = 1.0
|
|
draft_rope_alpha: Optional[float] = Field(
|
|
description="Automatically calculated if not present",
|
|
default=None,
|
|
examples=[1.0],
|
|
)
|
|
|
|
|
|
# TODO: Unify this with ModelCardParams
|
|
class ModelLoadRequest(BaseModel):
|
|
"""Represents a model load request."""
|
|
|
|
name: str
|
|
|
|
# Max seq len is fetched from config.json of the model by default
|
|
max_seq_len: Optional[int] = Field(
|
|
description="Leave this blank to use the model's base sequence length",
|
|
default=None,
|
|
examples=[4096],
|
|
)
|
|
override_base_seq_len: Optional[int] = Field(
|
|
description=(
|
|
"Overrides the model's base sequence length. " "Leave blank if unsure"
|
|
),
|
|
default=None,
|
|
examples=[4096],
|
|
)
|
|
gpu_split_auto: Optional[bool] = True
|
|
gpu_split: Optional[List[float]] = Field(
|
|
default_factory=list, examples=[[24.0, 20.0]]
|
|
)
|
|
rope_scale: Optional[float] = Field(
|
|
description="Automatically pulled from the model's config if not present",
|
|
default=None,
|
|
examples=[1.0],
|
|
)
|
|
rope_alpha: Optional[float] = Field(
|
|
description="Automatically calculated if not present",
|
|
default=None,
|
|
examples=[1.0],
|
|
)
|
|
no_flash_attention: Optional[bool] = False
|
|
# low_mem: Optional[bool] = False
|
|
cache_mode: Optional[str] = "FP16"
|
|
prompt_template: Optional[str] = None
|
|
num_experts_per_token: Optional[int] = None
|
|
use_cfg: Optional[bool] = None
|
|
draft: Optional[DraftModelLoadRequest] = None
|
|
|
|
|
|
class ModelLoadResponse(BaseModel):
|
|
"""Represents a model load response."""
|
|
|
|
# Avoids pydantic namespace warning
|
|
model_config = ConfigDict(protected_namespaces=[])
|
|
|
|
model_type: str = "model"
|
|
module: int
|
|
modules: int
|
|
status: str
|