From 751627e57136de93b9b9bb568bd3f167fd0f2459 Mon Sep 17 00:00:00 2001 From: kingbri Date: Thu, 25 Jan 2024 01:01:29 -0500 Subject: [PATCH] OAI: Add fasttensors to model load endpoint Also fix logging when loading prompt templates. Signed-off-by: kingbri --- OAI/types/model.py | 1 + backends/exllamav2/model.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/OAI/types/model.py b/OAI/types/model.py index 9096d41..c0ddd5d 100644 --- a/OAI/types/model.py +++ b/OAI/types/model.py @@ -90,6 +90,7 @@ class ModelLoadRequest(BaseModel): prompt_template: Optional[str] = None num_experts_per_token: Optional[int] = None use_cfg: Optional[bool] = None + fasttensors: Optional[bool] = False draft: Optional[DraftModelLoadRequest] = None diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 52764e2..ae1d7e4 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -243,7 +243,7 @@ class ExllamaV2Container: def find_prompt_template(self, prompt_template_name, model_directory): """Tries to find a prompt template using various methods""" - logger.info("Loading prompt template with name " f"{prompt_template_name}") + logger.info("Attempting to load a prompt template if present.") find_template_functions = [ lambda: get_template_from_model_json(