Config: replace disable_output_chunking flag with output_chunking

This commit is contained in:
turboderp
2025-10-14 02:47:52 +02:00
parent 7eee3924c7
commit 8abdfe7b13
4 changed files with 11 additions and 10 deletions

View File

@@ -250,8 +250,8 @@ class ExllamaV3Container(BaseModelContainer):
self.chunk_size = self.adjust_chunk_size(user_chunk_size)
# Output chunking
disable_output_chunking = unwrap(kwargs.get("disable_output_chunking"), False)
self.max_rq_tokens = None if disable_output_chunking else self.chunk_size
output_chunking = unwrap(kwargs.get("output_chunking"), True)
self.max_rq_tokens = self.chunk_size if output_chunking else None
# Template setup
self.prompt_template = await find_prompt_template(

View File

@@ -265,12 +265,13 @@ class ModelConfig(BaseConfigModel):
),
gt=0,
)
disable_output_chunking: Optional[bool] = Field(
False,
output_chunking: Optional[bool] = Field(
True,
description=(
"Disable output chunking (default: false).\n"
"Use output chunking (default: True)\n"
"Instead of allocating cache space for the entire completion at once, "
"allocate in chunks as needed.\n"
"Used by EXL3 models only.\n"
"True, allocate space in the cache for the entire response with each request..\n"
),
)
max_batch_size: Optional[int] = Field(

View File

@@ -133,10 +133,10 @@ model:
# An ideal value is between 512 and 4096.
chunk_size: 2048
# Disable output chunking (default: false)
# Use output chunking (default: True)
# Instead of allocating cache space for the entire completion at once, allocate in chunks as needed.
# Used by EXL3 models only.
# If True, allocate space in the cache for the entire response with each request.
disable_output_chunking: false
output_chunking: true
# Set the maximum number of prompts to process at one time (default: None/Automatic).
# Automatically calculated if left blank.

View File

@@ -109,7 +109,7 @@ class ModelLoadRequest(BaseModel):
)
cache_mode: Optional[str] = None
chunk_size: Optional[int] = None
disable_output_chunking: Optional[bool] = False
output_chunking: Optional[bool] = True
prompt_template: Optional[str] = None
vision: Optional[bool] = None