OAI: Allow /v1/encode endpoint to handle vision requests

* More robust checks for OAI chat completion message lists on /v1/encode endpoint
* Added TODO to support other aspects of chat completions
* Fix oversight where embeddings was not defined in advance on /v1/chat/completions endpoint
This commit is contained in:
DocShotgun
2024-11-19 11:14:37 -08:00
parent c42655336b
commit 5611365c07
4 changed files with 36 additions and 5 deletions

View File

@@ -862,7 +862,9 @@ class ExllamaV2Container:
async with self.load_condition:
self.load_condition.notify_all()
def encode_tokens(self, text: str, **kwargs):
def encode_tokens(
self, text: str, embeddings: MultimodalEmbeddingWrapper, **kwargs
):
"""Wrapper to encode tokens from a text string."""
return (
@@ -870,6 +872,7 @@ class ExllamaV2Container:
text,
add_bos=unwrap(kwargs.get("add_bos_token"), True),
encode_special_tokens=unwrap(kwargs.get("encode_special_tokens"), True),
embeddings=embeddings.content,
)
.flatten()
.tolist()