Config: replace disable_output_chunking flag with output_chunking

2026-03-15 00:07:28 +00:00 · 2025-10-14 02:47:52 +02:00
parent 7eee3924c7
commit 8abdfe7b13
4 changed files with 11 additions and 10 deletions
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -133,10 +133,10 @@ model:
  # An ideal value is between 512 and 4096.
  chunk_size: 2048

-  # Disable output chunking (default: false)
+  # Use output chunking (default: True)
+  # Instead of allocating cache space for the entire completion at once, allocate in chunks as needed.
  # Used by EXL3 models only.
-  # If True, allocate space in the cache for the entire response with each request.
-  disable_output_chunking: false
+  output_chunking: true

  # Set the maximum number of prompts to process at one time (default: None/Automatic).
  # Automatically calculated if left blank.