mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-20 14:28:54 +00:00
Tree: Switch to async generators
Async generation helps remove many roadblocks to managing tasks using threads. It should allow for abortables and modern-day paradigms. NOTE: Exllamav2 itself is not an asynchronous library. It's just been added into tabby's async nature to allow for a fast and concurrent API server. It's still being debated to run stream_ex in a separate thread or manually manage it using asyncio.sleep(0) Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Generator handling"""
|
||||
"""Concurrency handling"""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
@@ -52,7 +52,7 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
|
||||
progress.start()
|
||||
|
||||
try:
|
||||
for module, modules in load_status:
|
||||
async for module, modules in load_status:
|
||||
if module == 0:
|
||||
loading_task = progress.add_task(
|
||||
f"[cyan]Loading {model_type} modules", total=modules
|
||||
@@ -76,12 +76,12 @@ async def load_model(model_path: pathlib.Path, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
def load_loras(lora_dir, **kwargs):
|
||||
async def load_loras(lora_dir, **kwargs):
|
||||
"""Wrapper to load loras."""
|
||||
if len(container.active_loras) > 0:
|
||||
unload_loras()
|
||||
|
||||
return container.load_loras(lora_dir, **kwargs)
|
||||
return await container.load_loras(lora_dir, **kwargs)
|
||||
|
||||
|
||||
def unload_loras():
|
||||
|
||||
@@ -6,6 +6,8 @@ from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
from common.concurrency import release_semaphore
|
||||
|
||||
|
||||
def load_progress(module, modules):
|
||||
"""Wrapper callback for load progress."""
|
||||
@@ -51,6 +53,13 @@ def handle_request_error(message: str, exc_info: bool = True):
|
||||
return request_error
|
||||
|
||||
|
||||
def handle_request_disconnect(message: str):
|
||||
"""Wrapper for handling for request disconnection."""
|
||||
|
||||
release_semaphore()
|
||||
logger.error(message)
|
||||
|
||||
|
||||
def unwrap(wrapped, default=None):
|
||||
"""Unwrap function for Optionals."""
|
||||
if wrapped is None:
|
||||
|
||||
Reference in New Issue
Block a user