Model: Fix gpu split params

GPU split auto is a bool and GPU split is an array of integers for
GBs to allocate per GPU.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2023-11-14 23:20:14 -05:00
parent ea91d17a11
commit 126afdfdc2
3 changed files with 6 additions and 5 deletions

View File

@@ -15,7 +15,8 @@ class ModelList(BaseModel):
class ModelLoadRequest(BaseModel):
name: str
max_seq_len: Optional[int] = 4096
gpu_split: Optional[str] = "auto"
gpu_split_auto: Optional[bool] = True
gpu_split: Optional[List[float]] = Field(default_factory=list)
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = 1.0
no_flash_attention: Optional[bool] = False