mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
OAI: Add cancellation with inline load
When the request is cancelled, cancel the load task. In addition, when checking if a model container exists, also check if the model is fully loaded. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -55,7 +55,14 @@ async def completion_request(
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if data.model:
|
if data.model:
|
||||||
await load_inline_model(data.model, request)
|
inline_load_task = asyncio.create_task(load_inline_model(data.model, request))
|
||||||
|
|
||||||
|
await run_with_request_disconnect(
|
||||||
|
request,
|
||||||
|
inline_load_task,
|
||||||
|
disconnect_message=f"Model switch for generation {request.state.id} "
|
||||||
|
+ "cancelled by user.",
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
await check_model_container()
|
await check_model_container()
|
||||||
|
|
||||||
|
|||||||
@@ -112,8 +112,12 @@ async def _stream_collector(
|
|||||||
async def load_inline_model(model_name: str, request: Request):
|
async def load_inline_model(model_name: str, request: Request):
|
||||||
"""Load a model from the data.model parameter"""
|
"""Load a model from the data.model parameter"""
|
||||||
|
|
||||||
# Return if the model container already exists
|
# Return if the model container already exists and the model is fully loaded
|
||||||
if model.container and model.container.model_dir.name == model_name:
|
if (
|
||||||
|
model.container
|
||||||
|
and model.container.model_dir.name == model_name
|
||||||
|
and model.container.model_loaded
|
||||||
|
):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Inline model loading isn't enabled or the user isn't an admin
|
# Inline model loading isn't enabled or the user isn't an admin
|
||||||
|
|||||||
Reference in New Issue
Block a user