Remove vision model from qwen text encoder as it is not needed for image generation currently

This commit is contained in:
Jaret Burkett
2025-08-06 11:40:02 -06:00
parent 14ccf2f3ce
commit 4c4a10d439

View File

@@ -130,6 +130,10 @@ class QwenImageModel(BaseModel):
text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
base_model_path, subfolder="text_encoder", torch_dtype=dtype
)
# remove the visual model as it is not needed for image generation
text_encoder.model.visual = None
text_encoder.to(self.device_torch, dtype=dtype)
flush()