Add Auto Memory for qwen models in the ui

This commit is contained in:
Jaret Burkett
2025-10-07 10:43:52 -06:00
parent 1ea50d8590
commit dfc85f0b51
5 changed files with 80 additions and 18 deletions

View File

@@ -18,6 +18,7 @@ import AddSingleImageModal, { openAddImageModal } from '@/components/AddSingleIm
import SampleControlImage from '@/components/SampleControlImage';
import { FlipHorizontal2, FlipVertical2 } from 'lucide-react';
import { handleModelArchChange } from './utils';
import { IoFlaskSharp } from 'react-icons/io5';
type Props = {
jobConfig: JobConfig;
@@ -213,6 +214,18 @@ export default function SimpleJob({
/>
</FormGroup>
)}
{modelArch?.additionalSections?.includes('model.auto_memory') && (
<Checkbox
label={
<>
Auto Memory <IoFlaskSharp className="inline text-yellow-500" name="Experimental" />{' '}
</>
}
checked={jobConfig.config.process[0].model.auto_memory || false}
onChange={value => setJobConfig(value, 'config.process[0].model.auto_memory')}
docKey="model.auto_memory"
/>
)}
</Card>
{disableSections.includes('model.quantize') ? null : (
<Card title="Quantization">

View File

@@ -20,6 +20,7 @@ type AdditionalSections =
| 'sample.multi_ctrl_imgs'
| 'datasets.num_frames'
| 'model.multistage'
| 'model.auto_memory'
| 'model.low_vram';
type ModelGroup = 'image' | 'instruction' | 'video';
@@ -312,7 +313,7 @@ export const modelArchs: ModelArch[] = [
'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
},
disableSections: ['network.conv'],
additionalSections: ['model.low_vram'],
additionalSections: ['model.low_vram', 'model.auto_memory'],
accuracyRecoveryAdapters: {
'3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_torchao_uint3.safetensors',
},
@@ -333,7 +334,7 @@ export const modelArchs: ModelArch[] = [
'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
},
disableSections: ['network.conv'],
additionalSections: ['datasets.control_path', 'sample.ctrl_img', 'model.low_vram'],
additionalSections: ['datasets.control_path', 'sample.ctrl_img', 'model.low_vram', 'model.auto_memory'],
accuracyRecoveryAdapters: {
'3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_torchao_uint3.safetensors',
},
@@ -355,7 +356,12 @@ export const modelArchs: ModelArch[] = [
'config.process[0].model.qtype': ['qfloat8', 'qfloat8'],
},
disableSections: ['network.conv', 'train.unload_text_encoder'],
additionalSections: ['datasets.multi_control_paths', 'sample.multi_ctrl_imgs', 'model.low_vram'],
additionalSections: [
'datasets.multi_control_paths',
'sample.multi_ctrl_imgs',
'model.low_vram',
'model.auto_memory',
],
accuracyRecoveryAdapters: {
'3 bit with ARA': 'uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors',
},
@@ -465,7 +471,6 @@ export const groupedModelOptions: GroupedSelectOption[] = modelArchs.reduce((acc
export const quantizationOptions: SelectOption[] = [
{ value: '', label: '- NONE -' },
{ value: 'qfloat8', label: 'float8 (default)' },
{ value: 'uint8', label: '8 bit' },
{ value: 'uint7', label: '7 bit' },
{ value: 'uint6', label: '6 bit' },
{ value: 'uint5', label: '5 bit' },

View File

@@ -21,6 +21,20 @@ export const handleModelArchChange = (
setJobConfig(false, 'config.process[0].model.low_vram');
}
// handle auto memory setting
if (!newArch?.additionalSections?.includes('model.auto_memory')) {
if ('auto_memory' in jobConfig.config.process[0].model) {
const newModel = objectCopy(jobConfig.config.process[0].model);
delete newModel.auto_memory;
setJobConfig(newModel, 'config.process[0].model');
}
} else {
// set to false if not set
if (!('auto_memory' in jobConfig.config.process[0].model)) {
setJobConfig(false, 'config.process[0].model.auto_memory');
}
}
// revert defaults from previous model
for (const key in currentArch.defaults) {
setJobConfig(currentArch.defaults[key][1], key);

View File

@@ -1,5 +1,6 @@
import React from 'react';
import { ConfigDoc } from '@/types';
import { IoFlaskSharp } from 'react-icons/io5';
const docs: { [key: string]: ConfigDoc } = {
'config.name': {
@@ -56,7 +57,7 @@ const docs: { [key: string]: ConfigDoc } = {
description: (
<>
The control dataset needs to have files that match the filenames of your training dataset. They should be
matching file pairs. These images are fed as control/input images during training. The control images will be
matching file pairs. These images are fed as control/input images during training. The control images will be
resized to match the training images.
</>
),
@@ -71,8 +72,8 @@ const docs: { [key: string]: ConfigDoc } = {
<br />
For multi control datasets, the controls will all be applied in the order they are listed. If the model does not
require the images to be the same aspect ratios, such as with Qwen/Qwen-Image-Edit-2509, then the control images
do not need to match the aspect size or aspect ratio of the target image and they will be automatically resized to
the ideal resolutions for the model / target images.
do not need to match the aspect size or aspect ratio of the target image and they will be automatically resized
to the ideal resolutions for the model / target images.
</>
),
},
@@ -110,13 +111,15 @@ const docs: { [key: string]: ConfigDoc } = {
title: 'Flip X and Flip Y',
description: (
<>
You can augment your dataset on the fly by flipping the x (horizontal) and/or y (vertical) axis. Flipping a single axis will effectively double your dataset.
It will result it training on normal images, and the flipped versions of the images. This can be very helpful, but keep in mind it can also
be destructive. There is no reason to train people upside down, and flipping a face can confuse the model as a person's right side does not
You can augment your dataset on the fly by flipping the x (horizontal) and/or y (vertical) axis. Flipping a
single axis will effectively double your dataset. It will result it training on normal images, and the flipped
versions of the images. This can be very helpful, but keep in mind it can also be destructive. There is no
reason to train people upside down, and flipping a face can confuse the model as a person's right side does not
look identical to their left side. For text, obviously flipping text is not a good idea.
<br />
<br />
Control images for a dataset will also be flipped to match the images, so they will always match on the pixel level.
Control images for a dataset will also be flipped to match the images, so they will always match on the pixel
level.
</>
),
},
@@ -148,8 +151,8 @@ const docs: { [key: string]: ConfigDoc } = {
Some models have multi stage networks that are trained and used separately in the denoising process. Most
common, is to have 2 stages. One for high noise and one for low noise. You can choose to train both stages at
once or train them separately. If trained at the same time, The trainer will alternate between training each
model every so many steps and will output 2 different LoRAs. If you choose to train only one stage, the
trainer will only train that stage and output a single LoRA.
model every so many steps and will output 2 different LoRAs. If you choose to train only one stage, the trainer
will only train that stage and output a single LoRA.
</>
),
},
@@ -175,10 +178,36 @@ const docs: { [key: string]: ConfigDoc } = {
title: 'Force First Sample',
description: (
<>
This option will force the trainer to generate samples when it starts. The trainer will normally only generate a first sample
when nothing has been trained yet, but will not do a first sample when resuming from an existing checkpoint. This option
forces a first sample every time the trainer is started. This can be useful if you have changed sample prompts and want to see
the new prompts right away.
This option will force the trainer to generate samples when it starts. The trainer will normally only generate a
first sample when nothing has been trained yet, but will not do a first sample when resuming from an existing
checkpoint. This option forces a first sample every time the trainer is started. This can be useful if you have
changed sample prompts and want to see the new prompts right away.
</>
),
},
'model.auto_memory': {
title: (
<>
Auto Memory{' '}
<span className="text-yellow-500">
( <IoFlaskSharp className="inline text-yellow-500" name="Experimental" /> Experimental)
</span>
</>
),
description: (
<>
This is an experimental feature based on{' '}
<a className="text-blue-500" href="https://github.com/lodestone-rock/RamTorch" target="_blank">
RamTorch
</a>
. This feature is early and will have many updates and changes, so be aware it may not work consistently from
one update to the next. It will also only work with certain models.
<br />
<br />
Auto Memory uses the CPU RAM instead of the GPU ram to hold most of the model weights. This allows training a
much larger model on a smaller GPU, assuming you have enough CPU RAM. This is slower than training on pure GPU
RAM, but CPU RAM is cheaper and upgradeable. You will still need GPU RAM to hold the optimizer states and LoRA weights,
so a larger card is usually still needed.
</>
),
},

View File

@@ -153,6 +153,7 @@ export interface ModelConfig {
arch: string;
low_vram: boolean;
model_kwargs: { [key: string]: any };
auto_memory?: boolean;
}
export interface SampleItem {
@@ -231,7 +232,7 @@ export interface JobConfig {
}
export interface ConfigDoc {
title: string;
title: string | React.ReactNode;
description: React.ReactNode;
}