mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-20 14:28:54 +00:00
Model: Fix chunk size handling
Wrong class attribute name used for max_attention_size and fixes declaration of the draft model's chunk_size. Also expose the parameter to the end user in both config and model load. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -16,6 +16,7 @@ class ModelCardParameters(BaseModel):
|
||||
rope_scale: Optional[float] = 1.0
|
||||
rope_alpha: Optional[float] = 1.0
|
||||
cache_mode: Optional[str] = "FP16"
|
||||
chunk_size: Optional[int] = 2048
|
||||
prompt_template: Optional[str] = None
|
||||
num_experts_per_token: Optional[int] = None
|
||||
use_cfg: Optional[bool] = None
|
||||
@@ -90,6 +91,7 @@ class ModelLoadRequest(BaseModel):
|
||||
no_flash_attention: Optional[bool] = False
|
||||
# low_mem: Optional[bool] = False
|
||||
cache_mode: Optional[str] = "FP16"
|
||||
chunk_size: Optional[int] = 2048
|
||||
prompt_template: Optional[str] = None
|
||||
num_experts_per_token: Optional[int] = None
|
||||
use_cfg: Optional[bool] = None
|
||||
|
||||
Reference in New Issue
Block a user