mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Models can be loaded and unloaded via the API. Also add authentication to use the API and for administrator tasks. Both types of authorization use different keys. Also fix the unload function to properly free all used vram. Signed-off-by: kingbri <bdashore3@proton.me>
28 lines
744 B
Python
28 lines
744 B
Python
from pydantic import BaseModel, Field
|
|
from time import time
|
|
from typing import List, Optional
|
|
|
|
class ModelCard(BaseModel):
|
|
id: str = "test"
|
|
object: str = "model"
|
|
created: int = Field(default_factory=lambda: int(time()))
|
|
owned_by: str = "tabbyAPI"
|
|
|
|
class ModelList(BaseModel):
|
|
object: str = "list"
|
|
data: List[ModelCard] = Field(default_factory=list)
|
|
|
|
class ModelLoadRequest(BaseModel):
|
|
name: str
|
|
max_seq_len: Optional[int] = 4096
|
|
gpu_split: Optional[str] = "auto"
|
|
rope_scale: Optional[float] = 1.0
|
|
rope_alpha: Optional[float] = 1.0
|
|
no_flash_attention: Optional[bool] = False
|
|
low_mem: Optional[bool] = False
|
|
|
|
class ModelLoadResponse(BaseModel):
|
|
module: int
|
|
modules: int
|
|
status: str
|