OAI: Allow /v1/encode endpoint to handle vision requests

* More robust checks for OAI chat completion message lists on /v1/encode endpoint * Added TODO to support other aspects of chat completions * Fix oversight where embeddings was not defined in advance on /v1/chat/completions endpoint
2026-04-20 14:28:54 +00:00 · 2024-11-19 11:14:37 -08:00
parent c42655336b
commit 5611365c07
4 changed files with 36 additions and 5 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -862,7 +862,9 @@ class ExllamaV2Container:
                async with self.load_condition:
                    self.load_condition.notify_all()

-    def encode_tokens(self, text: str, **kwargs):
+    def encode_tokens(
+        self, text: str, embeddings: MultimodalEmbeddingWrapper, **kwargs
+    ):
        """Wrapper to encode tokens from a text string."""

        return (
@@ -870,6 +872,7 @@ class ExllamaV2Container:
                text,
                add_bos=unwrap(kwargs.get("add_bos_token"), True),
                encode_special_tokens=unwrap(kwargs.get("encode_special_tokens"), True),
+                embeddings=embeddings.content,
            )
            .flatten()
            .tolist()