mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Model: Use true async jobs and add logprobs
The new async dynamic job allows for native async support without the need of threading. Also add logprobs and metrics back to responses. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
"""Chat completion utilities for OAI server."""
|
||||
|
||||
from asyncio import CancelledError
|
||||
import pathlib
|
||||
import threading
|
||||
from asyncio import CancelledError
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -198,11 +197,8 @@ async def stream_generate_chat_completion(
|
||||
"""Generator for the generation process."""
|
||||
try:
|
||||
const_id = f"chatcmpl-{uuid4().hex}"
|
||||
abort_event = threading.Event()
|
||||
|
||||
new_generation = model.container.generate_gen(
|
||||
prompt, abort_event, **data.to_gen_params()
|
||||
)
|
||||
new_generation = model.container.generate_gen(prompt, **data.to_gen_params())
|
||||
async for generation in new_generation:
|
||||
response = _create_stream_chunk(const_id, generation, model_path.name)
|
||||
|
||||
@@ -214,7 +210,6 @@ async def stream_generate_chat_completion(
|
||||
except CancelledError:
|
||||
# Get out if the request gets disconnected
|
||||
|
||||
abort_event.set()
|
||||
handle_request_disconnect("Chat completion generation cancelled by user.")
|
||||
except Exception:
|
||||
yield get_generator_error(
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
import pathlib
|
||||
from asyncio import CancelledError
|
||||
import threading
|
||||
from fastapi import HTTPException
|
||||
from typing import Optional
|
||||
|
||||
@@ -65,10 +64,8 @@ async def stream_generate_completion(data: CompletionRequest, model_path: pathli
|
||||
"""Streaming generation for completions."""
|
||||
|
||||
try:
|
||||
abort_event = threading.Event()
|
||||
|
||||
new_generation = model.container.generate_gen(
|
||||
data.prompt, abort_event, **data.to_gen_params()
|
||||
data.prompt, **data.to_gen_params()
|
||||
)
|
||||
async for generation in new_generation:
|
||||
response = _create_response(generation, model_path.name)
|
||||
@@ -81,7 +78,6 @@ async def stream_generate_completion(data: CompletionRequest, model_path: pathli
|
||||
except CancelledError:
|
||||
# Get out if the request gets disconnected
|
||||
|
||||
abort_event.set()
|
||||
handle_request_disconnect("Completion generation cancelled by user.")
|
||||
except Exception:
|
||||
yield get_generator_error(
|
||||
|
||||
Reference in New Issue
Block a user