mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-19 22:08:59 +00:00
Middleware runs on both the request and response. Therefore, streaming responses had increased latency when processing tasks and sending data to the client which resulted in erratic streaming behavior. Use a depends to add request IDs since it only executes when the request is run rather than expecting the response to be sent as well. For the future, it would be best to think about limiting the time between each tick of chunk data to be safe. Signed-off-by: kingbri <bdashore3@proton.me>
63 lines
1.5 KiB
Python
63 lines
1.5 KiB
Python
import uvicorn
|
|
from fastapi import Depends, FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from loguru import logger
|
|
|
|
from common.logger import UVICORN_LOG_CONFIG
|
|
from common.networking import add_request_id
|
|
from endpoints.OAI.router import router as OAIRouter
|
|
|
|
app = FastAPI(
|
|
title="TabbyAPI",
|
|
summary="An OAI compatible exllamav2 API that's both lightweight and fast",
|
|
description=(
|
|
"This docs page is not meant to send requests! Please use a service "
|
|
"like Postman or a frontend UI."
|
|
),
|
|
dependencies=[Depends(add_request_id)]
|
|
)
|
|
|
|
# ALlow CORS requests
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
def setup_app():
|
|
"""Includes the correct routers for startup"""
|
|
|
|
app.include_router(OAIRouter)
|
|
|
|
|
|
def export_openapi():
|
|
"""Function to return the OpenAPI JSON from the API server"""
|
|
|
|
setup_app()
|
|
return app.openapi()
|
|
|
|
|
|
async def start_api(host: str, port: int):
|
|
"""Isolated function to start the API server"""
|
|
|
|
# TODO: Move OAI API to a separate folder
|
|
logger.info(f"Developer documentation: http://{host}:{port}/redoc")
|
|
logger.info(f"Completions: http://{host}:{port}/v1/completions")
|
|
logger.info(f"Chat completions: http://{host}:{port}/v1/chat/completions")
|
|
|
|
# Setup app
|
|
setup_app()
|
|
|
|
config = uvicorn.Config(
|
|
app,
|
|
host=host,
|
|
port=port,
|
|
log_config=UVICORN_LOG_CONFIG,
|
|
)
|
|
server = uvicorn.Server(config)
|
|
|
|
await server.serve()
|