diff --git a/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py b/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py index d8cfec01..14d1e6ea 100644 --- a/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py +++ b/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py @@ -200,6 +200,11 @@ class QwenImageEditPlusModel(QwenImageModel): ): with torch.no_grad(): batch_size, num_channels_latents, height, width = latent_model_input.shape + + control_image_res = VAE_IMAGE_SIZE + if self.model_config.model_kwargs.get("match_target_res", False): + # use the current target size to set the control image res + control_image_res = height * width * self.pipeline.vae_scale_factor # pack image tokens latent_model_input = latent_model_input.view( @@ -244,7 +249,7 @@ class QwenImageEditPlusModel(QwenImageModel): if len(control_img.shape) == 3: control_img = control_img.unsqueeze(0) ratio = control_img.shape[2] / control_img.shape[3] - c_width = math.sqrt(VAE_IMAGE_SIZE * ratio) + c_width = math.sqrt(control_image_res * ratio) c_height = c_width / ratio c_width = round(c_width / 32) * 32 diff --git a/ui/src/app/jobs/new/SimpleJob.tsx b/ui/src/app/jobs/new/SimpleJob.tsx index 964a7309..c13aaf4e 100644 --- a/ui/src/app/jobs/new/SimpleJob.tsx +++ b/ui/src/app/jobs/new/SimpleJob.tsx @@ -214,6 +214,14 @@ export default function SimpleJob({ /> )} + {modelArch?.additionalSections?.includes('model.qie.match_target_res') && ( + setJobConfig(value, 'config.process[0].model.model_kwargs.match_target_res')} + /> + )} {modelArch?.additionalSections?.includes('model.layer_offloading') && ( <>
- Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training a - much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU - RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA weights, - so a larger card is usually still needed. + Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training + a much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU + RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA + weights, so a larger card is usually still needed.

- You can also select the percentage of the layers to offload. It is generally best to offload as few as possible (close to 0%) - for best performance, but you can offload more if you need the memory. + You can also select the percentage of the layers to offload. It is generally best to offload as few as possible + (close to 0%) for best performance, but you can offload more if you need the memory. + + ), + }, + 'model.qie.match_target_res': { + title: 'Match Target Res', + description: ( + <> + This setting will make the control images match the resolution of the target image. The official inference + example for Qwen-Image-Edit-2509 feeds the control image is at 1MP resolution, no matter what size you are + generating. Doing this makes training at lower res difficult because 1MP control images are fed in despite how + large your target image is. Match Target Res will match the resolution of your target to feed in the control + images allowing you to use less VRAM when training with smaller resolutions. You can still use different aspect + ratios, the image will just be resizes to match the amount of pixels in the target image. ), },