Remove vision model from qwen text encoder as it is not needed for image generation currently

2026-03-10 13:09:51 +00:00 · 2025-08-06 11:40:02 -06:00
parent 14ccf2f3ce
commit 4c4a10d439
1 changed files with 4 additions and 0 deletions
--- a/extensions_built_in/diffusion_models/qwen_image/qwen_image.py
+++ b/extensions_built_in/diffusion_models/qwen_image/qwen_image.py
@@ -130,6 +130,10 @@ class QwenImageModel(BaseModel):
        text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
            base_model_path, subfolder="text_encoder", torch_dtype=dtype
        )
+        
+        # remove the visual model as it is not needed for image generation
+        text_encoder.model.visual = None
+        
        text_encoder.to(self.device_torch, dtype=dtype)
        flush()