Tree: Switch to asynchronous file handling

Using aiofiles, there's no longer a possiblity of blocking file operations
that can hang up the event loop. In addition, partially migrate
classes to use asynchronous init instead of the normal python magic method.

The only exception is config, since that's handled in the synchonous
init before the event loop starts.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-09-10 16:45:14 -04:00
parent 54bfb770af
commit 2c3bc71afa
9 changed files with 63 additions and 36 deletions

View File

@@ -1,5 +1,6 @@
"""The model container class for ExLlamaV2 models."""
import aiofiles
import asyncio
import gc
import math
@@ -106,13 +107,17 @@ class ExllamaV2Container:
load_lock: asyncio.Lock = asyncio.Lock()
load_condition: asyncio.Condition = asyncio.Condition()
def __init__(self, model_directory: pathlib.Path, quiet=False, **kwargs):
@classmethod
async def create(cls, model_directory: pathlib.Path, quiet=False, **kwargs):
"""
Primary initializer for model container.
Primary asynchronous initializer for model container.
Kwargs are located in config_sample.yml
"""
# Create a new instance as a "fake self"
self = cls()
self.quiet = quiet
# Initialize config
@@ -155,13 +160,13 @@ class ExllamaV2Container:
self.draft_config.prepare()
# Create the hf_config
self.hf_config = HuggingFaceConfig.from_file(model_directory)
self.hf_config = await HuggingFaceConfig.from_file(model_directory)
# Load generation config overrides
generation_config_path = model_directory / "generation_config.json"
if generation_config_path.exists():
try:
self.generation_config = GenerationConfig.from_file(
self.generation_config = await GenerationConfig.from_file(
generation_config_path.parent
)
except Exception:
@@ -171,7 +176,7 @@ class ExllamaV2Container:
)
# Apply a model's config overrides while respecting user settings
kwargs = self.set_model_overrides(**kwargs)
kwargs = await self.set_model_overrides(**kwargs)
# MARK: User configuration
@@ -320,7 +325,7 @@ class ExllamaV2Container:
self.cache_size = self.config.max_seq_len
# Try to set prompt template
self.prompt_template = self.find_prompt_template(
self.prompt_template = await self.find_prompt_template(
kwargs.get("prompt_template"), model_directory
)
@@ -373,7 +378,10 @@ class ExllamaV2Container:
self.draft_config.max_input_len = chunk_size
self.draft_config.max_attention_size = chunk_size**2
def set_model_overrides(self, **kwargs):
# Return the created instance
return self
async def set_model_overrides(self, **kwargs):
"""Sets overrides from a model folder's config yaml."""
override_config_path = self.model_dir / "tabby_config.yml"
@@ -381,8 +389,11 @@ class ExllamaV2Container:
if not override_config_path.exists():
return kwargs
with open(override_config_path, "r", encoding="utf8") as override_config_file:
override_args = unwrap(yaml.safe_load(override_config_file), {})
async with aiofiles.open(
override_config_path, "r", encoding="utf8"
) as override_config_file:
contents = await override_config_file.read()
override_args = unwrap(yaml.safe_load(contents), {})
# Merge draft overrides beforehand
draft_override_args = unwrap(override_args.get("draft"), {})
@@ -393,7 +404,7 @@ class ExllamaV2Container:
merged_kwargs = {**override_args, **kwargs}
return merged_kwargs
def find_prompt_template(self, prompt_template_name, model_directory):
async def find_prompt_template(self, prompt_template_name, model_directory):
"""Tries to find a prompt template using various methods."""
logger.info("Attempting to load a prompt template if present.")
@@ -431,7 +442,7 @@ class ExllamaV2Container:
# Continue on exception since functions are tried as they fail
for template_func in find_template_functions:
try:
prompt_template = template_func()
prompt_template = await template_func()
if prompt_template is not None:
return prompt_template
except TemplateLoadError as e: