OAI: Add API-based model loading/unloading and auth routes

Models can be loaded and unloaded via the API. Also add authentication
to use the API and for administrator tasks.

Both types of authorization use different keys.

Also fix the unload function to properly free all used vram.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2023-11-14 01:17:19 -05:00
parent 47343e2f1a
commit b625bface9
11 changed files with 195 additions and 55 deletions

View File

@@ -1,13 +0,0 @@
from pydantic import BaseModel, Field
from time import time
from typing import List
class ModelCard(BaseModel):
id: str
object: str = "model"
created: int = Field(default_factory=lambda: int(time()))
owned_by: str = "tabbyAPI"
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)

View File

@@ -2,7 +2,7 @@ from uuid import uuid4
from time import time
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Union
from OAI.models.common import LogProbs, UsageStats
from OAI.types.common import LogProbs, UsageStats
class CompletionRespChoice(BaseModel):
finish_reason: str

27
OAI/types/models.py Normal file
View File

@@ -0,0 +1,27 @@
from pydantic import BaseModel, Field
from time import time
from typing import List, Optional
class ModelCard(BaseModel):
id: str = "test"
object: str = "model"
created: int = Field(default_factory=lambda: int(time()))
owned_by: str = "tabbyAPI"
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)
class ModelLoadRequest(BaseModel):
name: str
max_seq_len: Optional[int] = 4096
gpu_split: Optional[str] = "auto"
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = 1.0
no_flash_attention: Optional[bool] = False
low_mem: Optional[bool] = False
class ModelLoadResponse(BaseModel):
module: int
modules: int
status: str

View File

@@ -1,7 +1,7 @@
import pathlib
from OAI.models.completions import CompletionResponse, CompletionRespChoice
from OAI.models.common import UsageStats
from OAI.models.models import ModelList, ModelCard
from OAI.types.completions import CompletionResponse, CompletionRespChoice
from OAI.types.common import UsageStats
from OAI.types.models import ModelList, ModelCard
from typing import Optional
def create_completion_response(text: str, index: int, model_name: Optional[str]):