Model: Use true async jobs and add logprobs

The new async dynamic job allows for native async support without the
need of threading. Also add logprobs and metrics back to responses.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-05-23 21:37:50 -04:00
committed by Brian Dashore
parent 32ae62feac
commit 06ff47e2b4
4 changed files with 102 additions and 217 deletions

View File

@@ -2,7 +2,6 @@
import pathlib
from asyncio import CancelledError
import threading
from fastapi import HTTPException
from typing import Optional
@@ -65,10 +64,8 @@ async def stream_generate_completion(data: CompletionRequest, model_path: pathli
"""Streaming generation for completions."""
try:
abort_event = threading.Event()
new_generation = model.container.generate_gen(
data.prompt, abort_event, **data.to_gen_params()
data.prompt, **data.to_gen_params()
)
async for generation in new_generation:
response = _create_response(generation, model_path.name)
@@ -81,7 +78,6 @@ async def stream_generate_completion(data: CompletionRequest, model_path: pathli
except CancelledError:
# Get out if the request gets disconnected
abort_event.set()
handle_request_disconnect("Completion generation cancelled by user.")
except Exception:
yield get_generator_error(