diff --git a/common/multimodal.py b/common/multimodal.py index 5b93f23..03bd93a 100644 --- a/common/multimodal.py +++ b/common/multimodal.py @@ -1,20 +1,20 @@ -from typing import List from backends.exllamav2.vision import get_image_embedding from common import model from loguru import logger +from pydantic import BaseModel, Field +from typing import List from common.optional_dependencies import dependencies if dependencies.exllamav2: from exllamav2 import ExLlamaV2VisionTower - -class MultimodalEmbeddingWrapper: +class MultimodalEmbeddingWrapper(BaseModel): """Common multimodal embedding wrapper""" type: str = None - content: List = [] - text_alias: List[str] = [] + content: list = Field(default_factory=list) + text_alias: List[str] = Field(default_factory=list) async def add(self, url: str): # Determine the type of vision embedding to use