mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
Sometimes fastchat may not be able to detect the prompt template from the model path. Therefore, add the ability to set it in config.yml or via the request object itself. Also send the provided prompt template on model info request. Signed-off-by: kingbri <bdashore3@proton.me>
46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
from pydantic import BaseModel, Field
|
|
from time import time
|
|
from typing import List, Optional
|
|
|
|
class ModelCardParameters(BaseModel):
|
|
max_seq_len: Optional[int] = 4096
|
|
rope_scale: Optional[float] = 1.0
|
|
rope_alpha: Optional[float] = 1.0
|
|
prompt_template: Optional[str] = None
|
|
draft: Optional['ModelCard'] = None
|
|
|
|
class ModelCard(BaseModel):
|
|
id: str = "test"
|
|
object: str = "model"
|
|
created: int = Field(default_factory=lambda: int(time()))
|
|
owned_by: str = "tabbyAPI"
|
|
parameters: Optional[ModelCardParameters] = None
|
|
|
|
class ModelList(BaseModel):
|
|
object: str = "list"
|
|
data: List[ModelCard] = Field(default_factory=list)
|
|
|
|
class DraftModelLoadRequest(BaseModel):
|
|
draft_model_name: str
|
|
draft_rope_alpha: float = 1.0
|
|
draft_rope_scale: float = 1.0
|
|
|
|
# TODO: Unify this with ModelCardParams
|
|
class ModelLoadRequest(BaseModel):
|
|
name: str
|
|
max_seq_len: Optional[int] = 4096
|
|
gpu_split_auto: Optional[bool] = True
|
|
gpu_split: Optional[List[float]] = Field(default_factory=list)
|
|
rope_scale: Optional[float] = 1.0
|
|
rope_alpha: Optional[float] = 1.0
|
|
no_flash_attention: Optional[bool] = False
|
|
low_mem: Optional[bool] = False
|
|
prompt_template: Optional[str] = None
|
|
draft: Optional[DraftModelLoadRequest] = None
|
|
|
|
class ModelLoadResponse(BaseModel):
|
|
model_type: str = "model"
|
|
module: int
|
|
modules: int
|
|
status: str
|