mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Fix #390
This commit is contained in:
@@ -1336,14 +1336,9 @@ class ExllamaV2Container(BaseModelContainer):
|
||||
negative_context_len = input_ids[1].size(dim=-1) if negative_prompt else 0
|
||||
|
||||
# Automatically set max_tokens to fill up the context
|
||||
# This should be an OK default, but may be changed in the future
|
||||
max_tokens = unwrap(
|
||||
params.max_tokens,
|
||||
self.config.max_seq_len - max(context_len, negative_context_len),
|
||||
)
|
||||
if max_tokens < 1:
|
||||
logger.warning("max_tokens must be a positive integer, setting to 1.")
|
||||
max_tokens = 1
|
||||
max_tokens = unwrap(params.max_tokens, 0)
|
||||
if max_tokens <= 0:
|
||||
max_tokens = self.config.max_seq_len - max(context_len, negative_context_len)
|
||||
|
||||
# Determine if the negative context or the context length is bigger
|
||||
context_to_check = max(negative_context_len, context_len)
|
||||
|
||||
@@ -951,14 +951,11 @@ class ExllamaV3Container(BaseModelContainer):
|
||||
# The first index will always be the positive prompt
|
||||
context_len = input_ids[0].size(dim=-1)
|
||||
|
||||
# Automatically set max_tokens to fill up the context
|
||||
max_tokens = unwrap(
|
||||
params.max_tokens if params.max_tokens > 0 else None,
|
||||
self.max_seq_len - context_len - 1,
|
||||
)
|
||||
if max_tokens < 1:
|
||||
logger.warning("max_tokens must be a positive integer, setting to 1.")
|
||||
max_tokens = 1
|
||||
# Unless specified in the request, automatically set max_tokens to fill up
|
||||
# the context
|
||||
max_tokens = unwrap(params.max_tokens, 0)
|
||||
if max_tokens <= 0:
|
||||
max_tokens = self.max_seq_len - context_len - 1
|
||||
|
||||
# Check total length of prompt against max context length
|
||||
if context_len > self.max_seq_len:
|
||||
|
||||
Reference in New Issue
Block a user