Added siglip 2 vision encoder for custom adapter

This commit is contained in:
Jaret Burkett
2025-03-09 00:14:44 +00:00
parent 51ad19b568
commit 850b8da6e5

View File

@@ -299,6 +299,15 @@ class CustomAdapter(torch.nn.Module):
self.vision_encoder = SiglipVisionModel.from_pretrained(
adapter_config.image_encoder_path,
ignore_mismatched_sizes=True).to(self.device, dtype=get_torch_dtype(self.sd_ref().dtype))
elif self.config.image_encoder_arch == 'siglip2':
from transformers import SiglipImageProcessor, SiglipVisionModel
try:
self.image_processor = SiglipImageProcessor.from_pretrained(adapter_config.image_encoder_path)
except EnvironmentError:
self.image_processor = SiglipImageProcessor()
self.vision_encoder = SiglipVisionModel.from_pretrained(
adapter_config.image_encoder_path,
ignore_mismatched_sizes=True).to(self.device, dtype=get_torch_dtype(self.sd_ref().dtype))
elif self.config.image_encoder_arch == 'pixtral':
self.image_processor = PixtralVisionImagePreprocessorCompatible(
max_image_size=self.config.pixtral_max_image_size,