Config: replace disable_output_chunking flag with output_chunking

2026-04-20 14:28:54 +00:00 · 2025-10-14 02:47:52 +02:00
parent 7eee3924c7
commit 8abdfe7b13
4 changed files with 11 additions and 10 deletions
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -265,12 +265,13 @@ class ModelConfig(BaseConfigModel):
        ),
        gt=0,
    )
-    disable_output_chunking: Optional[bool] = Field(
-        False,
+    output_chunking: Optional[bool] = Field(
+        True,
        description=(
-            "Disable output chunking (default: false).\n"
+            "Use output chunking (default: True)\n"
+            "Instead of allocating cache space for the entire completion at once, "
+            "allocate in chunks as needed.\n"
            "Used by EXL3 models only.\n"
-            "True, allocate space in the cache for the entire response with each request..\n"
        ),
    )
    max_batch_size: Optional[int] = Field(