API: Add draft model support

Models can be loaded with a child object called "draft" in the POST
request. Again, models need to be located within the draft model dir
to get loaded.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2023-11-19 00:32:25 -05:00
parent 6b9af58cc1
commit f47919b1d3
2 changed files with 37 additions and 16 deletions

View File

@@ -12,6 +12,10 @@ class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)
class DraftModelLoadRequest(BaseModel):
draft_model_name: str
draft_rope_alpha: float = 1.0
class ModelLoadRequest(BaseModel):
name: str
max_seq_len: Optional[int] = 4096
@@ -21,8 +25,10 @@ class ModelLoadRequest(BaseModel):
rope_alpha: Optional[float] = 1.0
no_flash_attention: Optional[bool] = False
low_mem: Optional[bool] = False
draft: Optional[DraftModelLoadRequest] = None
class ModelLoadResponse(BaseModel):
model_type: str = "model"
module: int
modules: int
status: str