Allow for matching target resolution with control images for Qwen Image Edit 2509

2026-04-30 11:11:37 +00:00 · 2025-10-10 14:24:27 -06:00
parent 1bc6dee127
commit e9c4d94256
4 changed files with 42 additions and 8 deletions
--- a/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py
+++ b/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py
@@ -200,6 +200,11 @@ class QwenImageEditPlusModel(QwenImageModel):
    ):
        with torch.no_grad():
            batch_size, num_channels_latents, height, width = latent_model_input.shape
            control_image_res = VAE_IMAGE_SIZE
            if self.model_config.model_kwargs.get("match_target_res", False):
                # use the current target size to set the control image res
                control_image_res = height * width * self.pipeline.vae_scale_factor
            # pack image tokens
            latent_model_input = latent_model_input.view(
@@ -244,7 +249,7 @@ class QwenImageEditPlusModel(QwenImageModel):
                        if len(control_img.shape) == 3:
                            control_img = control_img.unsqueeze(0)
                        ratio = control_img.shape[2] / control_img.shape[3]
-                        c_width = math.sqrt(VAE_IMAGE_SIZE * ratio)
+                        c_width = math.sqrt(control_image_res * ratio)
                        c_height = c_width / ratio
                        c_width = round(c_width / 32) * 32
--- a/ui/src/app/jobs/new/SimpleJob.tsx
+++ b/ui/src/app/jobs/new/SimpleJob.tsx
@@ -214,6 +214,14 @@ export default function SimpleJob({
                />
              </FormGroup>
            )}
            {modelArch?.additionalSections?.includes('model.qie.match_target_res') && (
                <Checkbox
                  label="Match Target Res"
                  docKey="model.qie.match_target_res"
                  checked={jobConfig.config.process[0].model.model_kwargs.match_target_res}
                  onChange={value => setJobConfig(value, 'config.process[0].model.model_kwargs.match_target_res')}
                />
            )}
            {modelArch?.additionalSections?.includes('model.layer_offloading') && (
              <>
                <Checkbox
--- a/ui/src/app/jobs/new/options.ts
+++ b/ui/src/app/jobs/new/options.ts
@@ -21,7 +21,8 @@ type AdditionalSections =
  | 'datasets.num_frames'
  | 'model.multistage'
  | 'model.layer_offloading'
-  | 'model.low_vram';
+  | 'model.low_vram'
  | 'model.qie.match_target_res';
 type ModelGroup = 'image' | 'instruction' | 'video';
 export interface ModelArch {
@@ -354,6 +355,12 @@ export const modelArchs: ModelArch[] = [
      'config.process[0].train.noise_scheduler': ['flowmatch', 'flowmatch'],
      'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
      'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
      'config.process[0].model.model_kwargs': [
        {
          match_target_res: false,
        },
        {},
      ],
    },
    disableSections: ['network.conv', 'train.unload_text_encoder'],
    additionalSections: [
@@ -361,6 +368,7 @@ export const modelArchs: ModelArch[] = [
      'sample.multi_ctrl_imgs',
      'model.low_vram',
      'model.layer_offloading',
      'model.qie.match_target_res',
    ],
    accuracyRecoveryAdapters: {
      '3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors',
--- a/ui/src/docs.tsx
+++ b/ui/src/docs.tsx
@@ -204,14 +204,27 @@ const docs: { [key: string]: ConfigDoc } = {
        one update to the next. It will also only work with certain models.
        <br />
        <br />
-        Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training a
+        Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training
-        much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
+        a much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
-        RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA weights, 
+        RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA
-        so a larger card is usually still needed.
+        weights, so a larger card is usually still needed.
        <br />
        <br />
-        You can also select the percentage of the layers to offload. It is generally best to offload as few as possible (close to 0%) 
+        You can also select the percentage of the layers to offload. It is generally best to offload as few as possible
-        for best performance, but you can offload more if you need the memory.
+        (close to 0%) for best performance, but you can offload more if you need the memory.
      </>
    ),
  },
  'model.qie.match_target_res': {
    title: 'Match Target Res',
    description: (
      <>
        This setting will make the control images match the resolution of the target image. The official inference
        example for Qwen-Image-Edit-2509 feeds the control image is at 1MP resolution, no matter what size you are
        generating. Doing this makes training at lower res difficult because 1MP control images are fed in despite how
        large your target image is. Match Target Res will match the resolution of your target to feed in the control
        images allowing you to use less VRAM when training with smaller resolutions. You can still use different aspect
        ratios, the image will just be resizes to match the amount of pixels in the target image.
      </>
    ),
  },