tabbyAPI/common/errors.py

class ContextLengthExceededError(ValueError):
    """Raised when a tokenized prompt exceeds the loaded model's context limit."""


def validate_context_requirements(
    context_len: int,
    max_seq_len: int,
    max_tokens: int,
    cache_capacity: int,
    max_rq_tokens: int | None = None,
    allocation_boundary: int = 256,
):
    """Validate the initial cache allocation required by an ExLlamaV3 job."""

    if context_len > max_seq_len:
        raise ContextLengthExceededError(
            f"Prompt length {context_len} is greater than max_seq_len {max_seq_len}"
        )

    if max_tokens <= 0:
        max_tokens = max_seq_len - context_len - 1

    if max_rq_tokens is not None:
        required_tokens = (
            (context_len - 1 + max_rq_tokens + allocation_boundary - 1) // allocation_boundary
        ) * allocation_boundary
    else:
        required_tokens = context_len + max_tokens

    if required_tokens > cache_capacity:
        raise ContextLengthExceededError(
            f"Initial job allocation requires {required_tokens} cache tokens, "
            f"which exceeds the available context size of {cache_capacity} tokens"
        )