mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Model + API: GPU split updates and fixes
For the TP loader, GPU split cannot be an empty array. However, defaulting the parameter to an empty array makes it easier to calculate the device list. Therefore, cast an empty array to None using falsy comparisons at load time. Also add draft_gpu_split to the load request. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -63,7 +63,10 @@ class DraftModelLoadRequest(BaseModel):
|
||||
default=None,
|
||||
examples=[1.0],
|
||||
)
|
||||
draft_cache_mode: Optional[str] = None
|
||||
draft_gpu_split: Optional[List[float]] = Field(
|
||||
default_factory=list,
|
||||
examples=[[24.0, 20.0]],
|
||||
)
|
||||
|
||||
|
||||
class ModelLoadRequest(BaseModel):
|
||||
@@ -94,7 +97,7 @@ class ModelLoadRequest(BaseModel):
|
||||
gpu_split_auto: Optional[bool] = None
|
||||
autosplit_reserve: Optional[List[float]] = None
|
||||
gpu_split: Optional[List[float]] = Field(
|
||||
default=None,
|
||||
default_factory=list,
|
||||
examples=[[24.0, 20.0]],
|
||||
)
|
||||
rope_scale: Optional[float] = Field(
|
||||
|
||||
Reference in New Issue
Block a user