Allow for matching target resolution with control images for Qwen Image Edit 2509

This commit is contained in:
Jaret Burkett
2025-10-10 14:24:27 -06:00
parent 1bc6dee127
commit e9c4d94256
4 changed files with 42 additions and 8 deletions

View File

@@ -200,6 +200,11 @@ class QwenImageEditPlusModel(QwenImageModel):
):
with torch.no_grad():
batch_size, num_channels_latents, height, width = latent_model_input.shape
control_image_res = VAE_IMAGE_SIZE
if self.model_config.model_kwargs.get("match_target_res", False):
# use the current target size to set the control image res
control_image_res = height * width * self.pipeline.vae_scale_factor
# pack image tokens
latent_model_input = latent_model_input.view(
@@ -244,7 +249,7 @@ class QwenImageEditPlusModel(QwenImageModel):
if len(control_img.shape) == 3:
control_img = control_img.unsqueeze(0)
ratio = control_img.shape[2] / control_img.shape[3]
c_width = math.sqrt(VAE_IMAGE_SIZE * ratio)
c_width = math.sqrt(control_image_res * ratio)
c_height = c_width / ratio
c_width = round(c_width / 32) * 32

View File

@@ -214,6 +214,14 @@ export default function SimpleJob({
/>
</FormGroup>
)}
{modelArch?.additionalSections?.includes('model.qie.match_target_res') && (
<Checkbox
label="Match Target Res"
docKey="model.qie.match_target_res"
checked={jobConfig.config.process[0].model.model_kwargs.match_target_res}
onChange={value => setJobConfig(value, 'config.process[0].model.model_kwargs.match_target_res')}
/>
)}
{modelArch?.additionalSections?.includes('model.layer_offloading') && (
<>
<Checkbox

View File

@@ -21,7 +21,8 @@ type AdditionalSections =
| 'datasets.num_frames'
| 'model.multistage'
| 'model.layer_offloading'
| 'model.low_vram';
| 'model.low_vram'
| 'model.qie.match_target_res';
type ModelGroup = 'image' | 'instruction' | 'video';
export interface ModelArch {
@@ -354,6 +355,12 @@ export const modelArchs: ModelArch[] = [
'config.process[0].train.noise_scheduler': ['flowmatch', 'flowmatch'],
'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
'config.process[0].model.model_kwargs': [
{
match_target_res: false,
},
{},
],
},
disableSections: ['network.conv', 'train.unload_text_encoder'],
additionalSections: [
@@ -361,6 +368,7 @@ export const modelArchs: ModelArch[] = [
'sample.multi_ctrl_imgs',
'model.low_vram',
'model.layer_offloading',
'model.qie.match_target_res',
],
accuracyRecoveryAdapters: {
'3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors',

View File

@@ -204,14 +204,27 @@ const docs: { [key: string]: ConfigDoc } = {
one update to the next. It will also only work with certain models.
<br />
<br />
Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training a
much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA weights,
so a larger card is usually still needed.
Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training
a much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA
weights, so a larger card is usually still needed.
<br />
<br />
You can also select the percentage of the layers to offload. It is generally best to offload as few as possible (close to 0%)
for best performance, but you can offload more if you need the memory.
You can also select the percentage of the layers to offload. It is generally best to offload as few as possible
(close to 0%) for best performance, but you can offload more if you need the memory.
</>
),
},
'model.qie.match_target_res': {
title: 'Match Target Res',
description: (
<>
This setting will make the control images match the resolution of the target image. The official inference
example for Qwen-Image-Edit-2509 feeds the control image is at 1MP resolution, no matter what size you are
generating. Doing this makes training at lower res difficult because 1MP control images are fed in despite how
large your target image is. Match Target Res will match the resolution of your target to feed in the control
images allowing you to use less VRAM when training with smaller resolutions. You can still use different aspect
ratios, the image will just be resizes to match the amount of pixels in the target image.
</>
),
},