mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-28 18:21:42 +00:00
OAI: Add cache_mode parameter to model
Mistakenly forgot that the user can choose what cache mode to use when loading a model. Also add when fetching model info. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -8,6 +8,7 @@ class ModelCardParameters(BaseModel):
|
|||||||
rope_scale: Optional[float] = 1.0
|
rope_scale: Optional[float] = 1.0
|
||||||
rope_alpha: Optional[float] = 1.0
|
rope_alpha: Optional[float] = 1.0
|
||||||
prompt_template: Optional[str] = None
|
prompt_template: Optional[str] = None
|
||||||
|
cache_mode: Optional[str] = "FP16"
|
||||||
draft: Optional['ModelCard'] = None
|
draft: Optional['ModelCard'] = None
|
||||||
|
|
||||||
class ModelCard(BaseModel):
|
class ModelCard(BaseModel):
|
||||||
@@ -37,6 +38,7 @@ class ModelLoadRequest(BaseModel):
|
|||||||
rope_alpha: Optional[float] = 1.0
|
rope_alpha: Optional[float] = 1.0
|
||||||
no_flash_attention: Optional[bool] = False
|
no_flash_attention: Optional[bool] = False
|
||||||
# low_mem: Optional[bool] = False
|
# low_mem: Optional[bool] = False
|
||||||
|
cache_mode: Optional[str] = "FP16"
|
||||||
prompt_template: Optional[str] = None
|
prompt_template: Optional[str] = None
|
||||||
draft: Optional[DraftModelLoadRequest] = None
|
draft: Optional[DraftModelLoadRequest] = None
|
||||||
|
|
||||||
|
|||||||
1
main.py
1
main.py
@@ -82,6 +82,7 @@ async def get_current_model():
|
|||||||
rope_scale = model_container.config.scale_pos_emb,
|
rope_scale = model_container.config.scale_pos_emb,
|
||||||
rope_alpha = model_container.config.scale_alpha_value,
|
rope_alpha = model_container.config.scale_alpha_value,
|
||||||
max_seq_len = model_container.config.max_seq_len,
|
max_seq_len = model_container.config.max_seq_len,
|
||||||
|
cache_mode = "FP8" if model_container.cache_fp8 else "FP16",
|
||||||
prompt_template = unwrap(model_container.prompt_template, "auto")
|
prompt_template = unwrap(model_container.prompt_template, "auto")
|
||||||
),
|
),
|
||||||
logging = gen_logging.config
|
logging = gen_logging.config
|
||||||
|
|||||||
Reference in New Issue
Block a user