Tree: Switch to asynchronous file handling

Using aiofiles, there's no longer a possiblity of blocking file operations that can hang up the event loop. In addition, partially migrate classes to use asynchronous init instead of the normal python magic method. The only exception is config, since that's handled in the synchonous init before the event loop starts. Signed-off-by: kingbri <bdashore3@proton.me>
2026-03-15 00:07:28 +00:00 · 2024-09-10 16:45:14 -04:00
parent 54bfb770af
commit 2c3bc71afa
9 changed files with 63 additions and 36 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -1,5 +1,6 @@
 """The model container class for ExLlamaV2 models."""

+import aiofiles
 import asyncio
 import gc
 import math
@@ -106,13 +107,17 @@ class ExllamaV2Container:
    load_lock: asyncio.Lock = asyncio.Lock()
    load_condition: asyncio.Condition = asyncio.Condition()

-    def __init__(self, model_directory: pathlib.Path, quiet=False, **kwargs):
+    @classmethod
+    async def create(cls, model_directory: pathlib.Path, quiet=False, **kwargs):
        """
-        Primary initializer for model container.
+        Primary asynchronous initializer for model container.

        Kwargs are located in config_sample.yml
        """

+        # Create a new instance as a "fake self"
+        self = cls()
+
        self.quiet = quiet

        # Initialize config
@@ -155,13 +160,13 @@ class ExllamaV2Container:
            self.draft_config.prepare()

        # Create the hf_config
-        self.hf_config = HuggingFaceConfig.from_file(model_directory)
+        self.hf_config = await HuggingFaceConfig.from_file(model_directory)

        # Load generation config overrides
        generation_config_path = model_directory / "generation_config.json"
        if generation_config_path.exists():
            try:
-                self.generation_config = GenerationConfig.from_file(
+                self.generation_config = await GenerationConfig.from_file(
                    generation_config_path.parent
                )
            except Exception:
@@ -171,7 +176,7 @@ class ExllamaV2Container:
                )

        # Apply a model's config overrides while respecting user settings
-        kwargs = self.set_model_overrides(**kwargs)
+        kwargs = await self.set_model_overrides(**kwargs)

        # MARK: User configuration

@@ -320,7 +325,7 @@ class ExllamaV2Container:
            self.cache_size = self.config.max_seq_len

        # Try to set prompt template
-        self.prompt_template = self.find_prompt_template(
+        self.prompt_template = await self.find_prompt_template(
            kwargs.get("prompt_template"), model_directory
        )

@@ -373,7 +378,10 @@ class ExllamaV2Container:
                self.draft_config.max_input_len = chunk_size
                self.draft_config.max_attention_size = chunk_size**2

-    def set_model_overrides(self, **kwargs):
+        # Return the created instance
+        return self
+
+    async def set_model_overrides(self, **kwargs):
        """Sets overrides from a model folder's config yaml."""

        override_config_path = self.model_dir / "tabby_config.yml"
@@ -381,8 +389,11 @@ class ExllamaV2Container:
        if not override_config_path.exists():
            return kwargs

-        with open(override_config_path, "r", encoding="utf8") as override_config_file:
-            override_args = unwrap(yaml.safe_load(override_config_file), {})
+        async with aiofiles.open(
+            override_config_path, "r", encoding="utf8"
+        ) as override_config_file:
+            contents = await override_config_file.read()
+            override_args = unwrap(yaml.safe_load(contents), {})

            # Merge draft overrides beforehand
            draft_override_args = unwrap(override_args.get("draft"), {})
@@ -393,7 +404,7 @@ class ExllamaV2Container:
            merged_kwargs = {**override_args, **kwargs}
            return merged_kwargs

-    def find_prompt_template(self, prompt_template_name, model_directory):
+    async def find_prompt_template(self, prompt_template_name, model_directory):
        """Tries to find a prompt template using various methods."""

        logger.info("Attempting to load a prompt template if present.")
@@ -431,7 +442,7 @@ class ExllamaV2Container:
        # Continue on exception since functions are tried as they fail
        for template_func in find_template_functions:
            try:
-                prompt_template = template_func()
+                prompt_template = await template_func()
                if prompt_template is not None:
                    return prompt_template
            except TemplateLoadError as e: