mirror of
https://github.com/ostris/ai-toolkit.git
synced 2026-01-26 16:39:47 +00:00
Allow for matching target resolution with control images for Qwen Image Edit 2509
This commit is contained in:
@@ -214,6 +214,14 @@ export default function SimpleJob({
|
||||
/>
|
||||
</FormGroup>
|
||||
)}
|
||||
{modelArch?.additionalSections?.includes('model.qie.match_target_res') && (
|
||||
<Checkbox
|
||||
label="Match Target Res"
|
||||
docKey="model.qie.match_target_res"
|
||||
checked={jobConfig.config.process[0].model.model_kwargs.match_target_res}
|
||||
onChange={value => setJobConfig(value, 'config.process[0].model.model_kwargs.match_target_res')}
|
||||
/>
|
||||
)}
|
||||
{modelArch?.additionalSections?.includes('model.layer_offloading') && (
|
||||
<>
|
||||
<Checkbox
|
||||
|
||||
@@ -21,7 +21,8 @@ type AdditionalSections =
|
||||
| 'datasets.num_frames'
|
||||
| 'model.multistage'
|
||||
| 'model.layer_offloading'
|
||||
| 'model.low_vram';
|
||||
| 'model.low_vram'
|
||||
| 'model.qie.match_target_res';
|
||||
type ModelGroup = 'image' | 'instruction' | 'video';
|
||||
|
||||
export interface ModelArch {
|
||||
@@ -354,6 +355,12 @@ export const modelArchs: ModelArch[] = [
|
||||
'config.process[0].train.noise_scheduler': ['flowmatch', 'flowmatch'],
|
||||
'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
|
||||
'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
|
||||
'config.process[0].model.model_kwargs': [
|
||||
{
|
||||
match_target_res: false,
|
||||
},
|
||||
{},
|
||||
],
|
||||
},
|
||||
disableSections: ['network.conv', 'train.unload_text_encoder'],
|
||||
additionalSections: [
|
||||
@@ -361,6 +368,7 @@ export const modelArchs: ModelArch[] = [
|
||||
'sample.multi_ctrl_imgs',
|
||||
'model.low_vram',
|
||||
'model.layer_offloading',
|
||||
'model.qie.match_target_res',
|
||||
],
|
||||
accuracyRecoveryAdapters: {
|
||||
'3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors',
|
||||
|
||||
@@ -204,14 +204,27 @@ const docs: { [key: string]: ConfigDoc } = {
|
||||
one update to the next. It will also only work with certain models.
|
||||
<br />
|
||||
<br />
|
||||
Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training a
|
||||
much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
|
||||
RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA weights,
|
||||
so a larger card is usually still needed.
|
||||
Layer Offloading uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training
|
||||
a much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
|
||||
RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA
|
||||
weights, so a larger card is usually still needed.
|
||||
<br />
|
||||
<br />
|
||||
You can also select the percentage of the layers to offload. It is generally best to offload as few as possible (close to 0%)
|
||||
for best performance, but you can offload more if you need the memory.
|
||||
You can also select the percentage of the layers to offload. It is generally best to offload as few as possible
|
||||
(close to 0%) for best performance, but you can offload more if you need the memory.
|
||||
</>
|
||||
),
|
||||
},
|
||||
'model.qie.match_target_res': {
|
||||
title: 'Match Target Res',
|
||||
description: (
|
||||
<>
|
||||
This setting will make the control images match the resolution of the target image. The official inference
|
||||
example for Qwen-Image-Edit-2509 feeds the control image is at 1MP resolution, no matter what size you are
|
||||
generating. Doing this makes training at lower res difficult because 1MP control images are fed in despite how
|
||||
large your target image is. Match Target Res will match the resolution of your target to feed in the control
|
||||
images allowing you to use less VRAM when training with smaller resolutions. You can still use different aspect
|
||||
ratios, the image will just be resizes to match the amount of pixels in the target image.
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user