This commit is contained in:
turboderp
2025-10-15 10:40:19 +02:00
parent ad64942fa1
commit 0af29d957a
2 changed files with 8 additions and 16 deletions

View File

@@ -1336,14 +1336,9 @@ class ExllamaV2Container(BaseModelContainer):
negative_context_len = input_ids[1].size(dim=-1) if negative_prompt else 0
# Automatically set max_tokens to fill up the context
# This should be an OK default, but may be changed in the future
max_tokens = unwrap(
params.max_tokens,
self.config.max_seq_len - max(context_len, negative_context_len),
)
if max_tokens < 1:
logger.warning("max_tokens must be a positive integer, setting to 1.")
max_tokens = 1
max_tokens = unwrap(params.max_tokens, 0)
if max_tokens <= 0:
max_tokens = self.config.max_seq_len - max(context_len, negative_context_len)
# Determine if the negative context or the context length is bigger
context_to_check = max(negative_context_len, context_len)

View File

@@ -951,14 +951,11 @@ class ExllamaV3Container(BaseModelContainer):
# The first index will always be the positive prompt
context_len = input_ids[0].size(dim=-1)
# Automatically set max_tokens to fill up the context
max_tokens = unwrap(
params.max_tokens if params.max_tokens > 0 else None,
self.max_seq_len - context_len - 1,
)
if max_tokens < 1:
logger.warning("max_tokens must be a positive integer, setting to 1.")
max_tokens = 1
# Unless specified in the request, automatically set max_tokens to fill up
# the context
max_tokens = unwrap(params.max_tokens, 0)
if max_tokens <= 0:
max_tokens = self.max_seq_len - context_len - 1
# Check total length of prompt against max context length
if context_len > self.max_seq_len: