Allow for matching target resolution with control images for Qwen Image Edit 2509

2026-03-13 06:29:48 +00:00 · 2025-10-10 14:24:27 -06:00
parent 1bc6dee127
commit e9c4d94256
4 changed files with 42 additions and 8 deletions
--- a/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py
+++ b/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py
@@ -200,6 +200,11 @@ class QwenImageEditPlusModel(QwenImageModel):
    ):
        with torch.no_grad():
            batch_size, num_channels_latents, height, width = latent_model_input.shape
+            
+            control_image_res = VAE_IMAGE_SIZE
+            if self.model_config.model_kwargs.get("match_target_res", False):
+                # use the current target size to set the control image res
+                control_image_res = height * width * self.pipeline.vae_scale_factor

            # pack image tokens
            latent_model_input = latent_model_input.view(
@@ -244,7 +249,7 @@ class QwenImageEditPlusModel(QwenImageModel):
                        if len(control_img.shape) == 3:
                            control_img = control_img.unsqueeze(0)
                        ratio = control_img.shape[2] / control_img.shape[3]
-                        c_width = math.sqrt(VAE_IMAGE_SIZE * ratio)
+                        c_width = math.sqrt(control_image_res * ratio)
                        c_height = c_width / ratio

                        c_width = round(c_width / 32) * 32
--- a/ui/src/app/jobs/new/SimpleJob.tsx
+++ b/ui/src/app/jobs/new/SimpleJob.tsx
@@ -214,6 +214,14 @@ export default function SimpleJob({
                />
              </FormGroup>
            )}
+            {modelArch?.additionalSections?.includes('model.qie.match_target_res') && (
+                <Checkbox
+                  label="Match Target Res"
+                  docKey="model.qie.match_target_res"
+                  checked={jobConfig.config.process[0].model.model_kwargs.match_target_res}
+                  onChange={value => setJobConfig(value, 'config.process[0].model.model_kwargs.match_target_res')}
+                />
+            )}
            {modelArch?.additionalSections?.includes('model.layer_offloading') && (
              <>
                <Checkbox
--- a/ui/src/app/jobs/new/options.ts
+++ b/ui/src/app/jobs/new/options.ts
@@ -21,7 +21,8 @@ type AdditionalSections =
  | 'datasets.num_frames'
  | 'model.multistage'
  | 'model.layer_offloading'
-  | 'model.low_vram';
+  | 'model.low_vram'
+  | 'model.qie.match_target_res';
 type ModelGroup = 'image' | 'instruction' | 'video';

 export interface ModelArch {
@@ -354,6 +355,12 @@ export const modelArchs: ModelArch[] = [
      'config.process[0].train.noise_scheduler': ['flowmatch', 'flowmatch'],
      'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
      'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
+      'config.process[0].model.model_kwargs': [
+        {
+          match_target_res: false,
+        },
+        {},
+      ],
    },
    disableSections: ['network.conv', 'train.unload_text_encoder'],
    additionalSections: [
@@ -361,6 +368,7 @@ export const modelArchs: ModelArch[] = [
      'sample.multi_ctrl_imgs',
      'model.low_vram',
      'model.layer_offloading',
+      'model.qie.match_target_res',
    ],
    accuracyRecoveryAdapters: {
      '3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors',
--- a/ui/src/docs.tsx
+++ b/ui/src/docs.tsx
@@ -204,14 +204,27 @@ const docs: { [key: string]: ConfigDoc } = {
        one update to the next. It will also only work with certain models.
        <br />
        <br />
-        Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training a
-        much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
-        RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA weights, 
-        so a larger card is usually still needed.
+        Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training
+        a much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
+        RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA
+        weights, so a larger card is usually still needed.
        <br />
        <br />
-        You can also select the percentage of the layers to offload. It is generally best to offload as few as possible (close to 0%) 
-        for best performance, but you can offload more if you need the memory.
+        You can also select the percentage of the layers to offload. It is generally best to offload as few as possible
+        (close to 0%) for best performance, but you can offload more if you need the memory.
+      </>
+    ),
+  },
+  'model.qie.match_target_res': {
+    title: 'Match Target Res',
+    description: (
+      <>
+        This setting will make the control images match the resolution of the target image. The official inference
+        example for Qwen-Image-Edit-2509 feeds the control image is at 1MP resolution, no matter what size you are
+        generating. Doing this makes training at lower res difficult because 1MP control images are fed in despite how
+        large your target image is. Match Target Res will match the resolution of your target to feed in the control
+        images allowing you to use less VRAM when training with smaller resolutions. You can still use different aspect
+        ratios, the image will just be resizes to match the amount of pixels in the target image.
      </>
    ),
  },