mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
OAI: Add API-based model loading/unloading and auth routes
Models can be loaded and unloaded via the API. Also add authentication to use the API and for administrator tasks. Both types of authorization use different keys. Also fix the unload function to properly free all used vram. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -1,13 +0,0 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from time import time
|
||||
from typing import List
|
||||
|
||||
class ModelCard(BaseModel):
|
||||
id: str
|
||||
object: str = "model"
|
||||
created: int = Field(default_factory=lambda: int(time()))
|
||||
owned_by: str = "tabbyAPI"
|
||||
|
||||
class ModelList(BaseModel):
|
||||
object: str = "list"
|
||||
data: List[ModelCard] = Field(default_factory=list)
|
||||
@@ -2,7 +2,7 @@ from uuid import uuid4
|
||||
from time import time
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Dict, Union
|
||||
from OAI.models.common import LogProbs, UsageStats
|
||||
from OAI.types.common import LogProbs, UsageStats
|
||||
|
||||
class CompletionRespChoice(BaseModel):
|
||||
finish_reason: str
|
||||
27
OAI/types/models.py
Normal file
27
OAI/types/models.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from time import time
|
||||
from typing import List, Optional
|
||||
|
||||
class ModelCard(BaseModel):
|
||||
id: str = "test"
|
||||
object: str = "model"
|
||||
created: int = Field(default_factory=lambda: int(time()))
|
||||
owned_by: str = "tabbyAPI"
|
||||
|
||||
class ModelList(BaseModel):
|
||||
object: str = "list"
|
||||
data: List[ModelCard] = Field(default_factory=list)
|
||||
|
||||
class ModelLoadRequest(BaseModel):
|
||||
name: str
|
||||
max_seq_len: Optional[int] = 4096
|
||||
gpu_split: Optional[str] = "auto"
|
||||
rope_scale: Optional[float] = 1.0
|
||||
rope_alpha: Optional[float] = 1.0
|
||||
no_flash_attention: Optional[bool] = False
|
||||
low_mem: Optional[bool] = False
|
||||
|
||||
class ModelLoadResponse(BaseModel):
|
||||
module: int
|
||||
modules: int
|
||||
status: str
|
||||
@@ -1,7 +1,7 @@
|
||||
import pathlib
|
||||
from OAI.models.completions import CompletionResponse, CompletionRespChoice
|
||||
from OAI.models.common import UsageStats
|
||||
from OAI.models.models import ModelList, ModelCard
|
||||
from OAI.types.completions import CompletionResponse, CompletionRespChoice
|
||||
from OAI.types.common import UsageStats
|
||||
from OAI.types.models import ModelList, ModelCard
|
||||
from typing import Optional
|
||||
|
||||
def create_completion_response(text: str, index: int, model_name: Optional[str]):
|
||||
|
||||
Reference in New Issue
Block a user