Files
tabbyAPI/OAI/types/model.py
kingbri 083df7d585 Tree: Add generation logging support
Generations can be logged in the console along with sampling parameters
if the user enables it in config.

Metrics are always logged at the end of each prompt. In addition,
the model endpoint tells the user if they're being logged or not
for transparancy purposes.

Signed-off-by: kingbri <bdashore3@proton.me>
2023-12-12 23:43:35 -05:00

48 lines
1.4 KiB
Python

from pydantic import BaseModel, Field
from time import time
from typing import List, Optional
from gen_logging import LogConfig
class ModelCardParameters(BaseModel):
max_seq_len: Optional[int] = 4096
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = 1.0
prompt_template: Optional[str] = None
draft: Optional['ModelCard'] = None
class ModelCard(BaseModel):
id: str = "test"
object: str = "model"
created: int = Field(default_factory=lambda: int(time()))
owned_by: str = "tabbyAPI"
logging: Optional[LogConfig] = None
parameters: Optional[ModelCardParameters] = None
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)
class DraftModelLoadRequest(BaseModel):
draft_model_name: str
draft_rope_alpha: float = 1.0
draft_rope_scale: float = 1.0
# TODO: Unify this with ModelCardParams
class ModelLoadRequest(BaseModel):
name: str
max_seq_len: Optional[int] = 4096
gpu_split_auto: Optional[bool] = True
gpu_split: Optional[List[float]] = Field(default_factory=list)
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = 1.0
no_flash_attention: Optional[bool] = False
low_mem: Optional[bool] = False
prompt_template: Optional[str] = None
draft: Optional[DraftModelLoadRequest] = None
class ModelLoadResponse(BaseModel):
model_type: str = "model"
module: int
modules: int
status: str