diff --git a/extensions_built_in/diffusion_models/qwen_image/qwen_image.py b/extensions_built_in/diffusion_models/qwen_image/qwen_image.py index f7607ccb..bcd42ed3 100644 --- a/extensions_built_in/diffusion_models/qwen_image/qwen_image.py +++ b/extensions_built_in/diffusion_models/qwen_image/qwen_image.py @@ -130,6 +130,10 @@ class QwenImageModel(BaseModel): text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained( base_model_path, subfolder="text_encoder", torch_dtype=dtype ) + + # remove the visual model as it is not needed for image generation + text_encoder.model.visual = None + text_encoder.to(self.device_torch, dtype=dtype) flush()