Files
tabbyAPI/backends/exllamav3/vision.py
turboderp 79d581e1f5 OAI endpoints: More rework
- remove disconnect_task
- move disconnect logic to a per-request handler that wraps cleanup operation and directly polls the request state with throttling
- exclusively signal disconnect with CancelledError
- rework completions endpoint to follow same approach as chat completions, share some code
- refactor OAI endpoints a bit
- correct behavior for batched completion requests
- make sure logprobs work for completion and streaming completion requests
- more tests
2026-04-02 01:26:44 +02:00

63 lines
1.7 KiB
Python

"""Vision utilities for ExLlamaV3."""
from common import model
from common.optional_dependencies import dependencies
from common.image_util import get_image
from common.logger import xlogger
# Since this is used outside the Exl3 backend, the dependency
# may be optional
if dependencies.exllamav3:
from exllamav3.tokenizer import MMEmbedding
from collections import OrderedDict
from hashlib import blake2b
from typing import OrderedDict as OrderedDictType
_EMBEDDING_CACHE_CAPACITY = 32
_embedding_cache: OrderedDictType[bytes, tuple[str, "MMEmbedding"]] = OrderedDict()
def _image_key_128(s: str) -> bytes:
return blake2b(s.encode("utf-8"), digest_size=16).digest()
async def get_image_embedding_exl3(url: str) -> "MMEmbedding":
key = _image_key_128(url)
cached = _embedding_cache.get(key)
if cached is not None:
cached_url, embedding = cached
if cached_url == url:
_embedding_cache.move_to_end(key)
return embedding
image = await get_image(url)
embedding = model.container.vision_model.get_image_embeddings(
tokenizer=model.container.tokenizer,
image=image,
text_alias=None,
)
_embedding_cache[key] = (url, embedding)
_embedding_cache.move_to_end(key)
if len(_embedding_cache) > _EMBEDDING_CACHE_CAPACITY:
_embedding_cache.popitem(last=False)
xlogger.debug(
"Stored MMEmbedding",
{
"text_alias": embedding.text_alias,
"metadata": embedding.metadata,
"token_length": embedding.mm_length,
"cache_size": len(_embedding_cache),
},
)
return embedding
def clear_image_embedding_cache():
_embedding_cache.clear()