diff --git a/common/multimodal.py b/common/multimodal.py
index 5b93f23..03bd93a 100644
--- a/common/multimodal.py
+++ b/common/multimodal.py
@@ -1,20 +1,20 @@
-from typing import List
 from backends.exllamav2.vision import get_image_embedding
 from common import model
 from loguru import logger
+from pydantic import BaseModel, Field
+from typing import List
 
 from common.optional_dependencies import dependencies
 
 if dependencies.exllamav2:
     from exllamav2 import ExLlamaV2VisionTower
 
-
-class MultimodalEmbeddingWrapper:
+class MultimodalEmbeddingWrapper(BaseModel):
     """Common multimodal embedding wrapper"""
 
     type: str = None
-    content: List = []
-    text_alias: List[str] = []
+    content: list = Field(default_factory=list)
+    text_alias: List[str] = Field(default_factory=list)
 
     async def add(self, url: str):
         # Determine the type of vision embedding to use