mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Model: Enable max_rq_tokens (output chunking)
This commit is contained in:
@@ -265,6 +265,14 @@ class ModelConfig(BaseConfigModel):
|
||||
),
|
||||
gt=0,
|
||||
)
|
||||
disable_output_chunking: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
"Disable output chunking (default: false).\n"
|
||||
"Used by EXL3 models only.\n"
|
||||
"True, allocate space in the cache for the entire response with each request..\n"
|
||||
),
|
||||
)
|
||||
max_batch_size: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
|
||||
Reference in New Issue
Block a user