Model: Enable max_rq_tokens (output chunking)

This commit is contained in:
turboderp
2025-10-05 18:54:45 +02:00
parent e09a61969f
commit 52e093ae6c
4 changed files with 250 additions and 230 deletions

View File

@@ -109,6 +109,7 @@ class ModelLoadRequest(BaseModel):
)
cache_mode: Optional[str] = None
chunk_size: Optional[int] = None
disable_output_chunking: Optional[bool] = False
prompt_template: Optional[str] = None
vision: Optional[bool] = None