Model: Add abort on generation

When the model is processing a prompt, add the ability to abort on request cancellation. This is also a catch for a SIGINT. Signed-off-by: kingbri <bdashore3@proton.me>
2026-03-15 00:07:28 +00:00 · 2024-03-20 11:01:39 -04:00
parent 7020a0a2d1
commit 07d9b7cf7b
3 changed files with 21 additions and 5 deletions
--- a/endpoints/OAI/utils/completion.py
+++ b/endpoints/OAI/utils/completion.py
@@ -2,6 +2,7 @@

 import pathlib
 from asyncio import CancelledError
+import threading
 from fastapi import HTTPException
 from typing import Optional

@@ -64,8 +65,10 @@ async def stream_generate_completion(data: CompletionRequest, model_path: pathli
    """Streaming generation for completions."""

    try:
+        abort_event = threading.Event()
+
        new_generation = model.container.generate_gen(
-            data.prompt, **data.to_gen_params()
+            data.prompt, abort_event, **data.to_gen_params()
        )
        async for generation in new_generation:
            response = _create_response(generation, model_path.name)
@@ -78,6 +81,7 @@ async def stream_generate_completion(data: CompletionRequest, model_path: pathli
    except CancelledError:
        # Get out if the request gets disconnected

+        abort_event.set()
        handle_request_disconnect("Completion generation cancelled by user.")
    except Exception:
        yield get_generator_error(