Files
tabbyAPI/common/errors.py
2026-06-14 16:15:54 +02:00

35 lines
1.1 KiB
Python

class ContextLengthExceededError(ValueError):
"""Raised when a tokenized prompt exceeds the loaded model's context limit."""
def validate_context_requirements(
context_len: int,
max_seq_len: int,
max_tokens: int,
cache_capacity: int,
max_rq_tokens: int | None = None,
allocation_boundary: int = 256,
):
"""Validate the initial cache allocation required by an ExLlamaV3 job."""
if context_len > max_seq_len:
raise ContextLengthExceededError(
f"Prompt length {context_len} is greater than max_seq_len {max_seq_len}"
)
if max_tokens <= 0:
max_tokens = max_seq_len - context_len - 1
if max_rq_tokens is not None:
required_tokens = (
(context_len - 1 + max_rq_tokens + allocation_boundary - 1) // allocation_boundary
) * allocation_boundary
else:
required_tokens = context_len + max_tokens
if required_tokens > cache_capacity:
raise ContextLengthExceededError(
f"Initial job allocation requires {required_tokens} cache tokens, "
f"which exceeds the available context size of {cache_capacity} tokens"
)