mirror of
https://github.com/ostris/ai-toolkit.git
synced 2026-04-26 17:29:27 +00:00
Add LTX-2 Support (#644)
* WIP, adding support for LTX2 * Training on images working * Fix loading comfy models * Handle converting and deconverting lora so it matches original format * Reworked ui to habdle ltx and propert dataset default overwriting. * Update the way lokr saves to it is more compatable with comfy * Audio loading and synchronization/resampling is working * Add audio to training. Does it work? Maybe, still testing. * Fixed fps default issue for sound * Have ui set fps for accurate audio mapping on ltx * Added audio procession options to the ui for ltx * Clean up requirements
This commit is contained in:
@@ -15,7 +15,7 @@ export async function POST(request: Request) {
|
||||
}
|
||||
|
||||
// make sure it is an image
|
||||
if (!/\.(jpg|jpeg|png|bmp|gif|tiff|webp)$/i.test(imgPath.toLowerCase())) {
|
||||
if (!/\.(jpg|jpeg|png|bmp|gif|tiff|webp|mp4)$/i.test(imgPath.toLowerCase())) {
|
||||
return NextResponse.json({ error: 'Not an image' }, { status: 400 });
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ export async function GET(request: NextRequest, { params }: { params: { jobID: s
|
||||
const samples = fs
|
||||
.readdirSync(samplesFolder)
|
||||
.filter(file => {
|
||||
return file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.webp');
|
||||
return file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.webp') || file.endsWith('.mp4');
|
||||
})
|
||||
.map(file => {
|
||||
return path.join(samplesFolder, file);
|
||||
|
||||
@@ -862,6 +862,48 @@ export default function SimpleJob({
|
||||
docKey="datasets.do_i2v"
|
||||
/>
|
||||
)}
|
||||
{modelArch?.additionalSections?.includes('datasets.do_audio') && (
|
||||
<Checkbox
|
||||
label="Do Audio"
|
||||
checked={dataset.do_audio || false}
|
||||
onChange={value => {
|
||||
if (!value) {
|
||||
setJobConfig(undefined, `config.process[0].datasets[${i}].do_audio`);
|
||||
} else {
|
||||
setJobConfig(value, `config.process[0].datasets[${i}].do_audio`);
|
||||
}
|
||||
}}
|
||||
docKey="datasets.do_audio"
|
||||
/>
|
||||
)}
|
||||
{modelArch?.additionalSections?.includes('datasets.audio_normalize') && (
|
||||
<Checkbox
|
||||
label="Audio Normalize"
|
||||
checked={dataset.audio_normalize || false}
|
||||
onChange={value => {
|
||||
if (!value) {
|
||||
setJobConfig(undefined, `config.process[0].datasets[${i}].audio_normalize`);
|
||||
} else {
|
||||
setJobConfig(value, `config.process[0].datasets[${i}].audio_normalize`);
|
||||
}
|
||||
}}
|
||||
docKey="datasets.audio_normalize"
|
||||
/>
|
||||
)}
|
||||
{modelArch?.additionalSections?.includes('datasets.audio_preserve_pitch') && (
|
||||
<Checkbox
|
||||
label="Audio Preserve Pitch"
|
||||
checked={dataset.audio_preserve_pitch || false}
|
||||
onChange={value => {
|
||||
if (!value) {
|
||||
setJobConfig(undefined, `config.process[0].datasets[${i}].audio_preserve_pitch`);
|
||||
} else {
|
||||
setJobConfig(value, `config.process[0].datasets[${i}].audio_preserve_pitch`);
|
||||
}
|
||||
}}
|
||||
docKey="datasets.audio_preserve_pitch"
|
||||
/>
|
||||
)}
|
||||
</FormGroup>
|
||||
<FormGroup label="Flipping" docKey={'datasets.flip'} className="mt-2">
|
||||
<Checkbox
|
||||
|
||||
@@ -14,7 +14,6 @@ export const defaultDatasetConfig: DatasetConfig = {
|
||||
controls: [],
|
||||
shrink_video_to_frames: true,
|
||||
num_frames: 1,
|
||||
do_i2v: true,
|
||||
flip_x: false,
|
||||
flip_y: false,
|
||||
};
|
||||
|
||||
@@ -17,6 +17,9 @@ type AdditionalSections =
|
||||
| 'datasets.control_path'
|
||||
| 'datasets.multi_control_paths'
|
||||
| 'datasets.do_i2v'
|
||||
| 'datasets.do_audio'
|
||||
| 'datasets.audio_normalize'
|
||||
| 'datasets.audio_preserve_pitch'
|
||||
| 'sample.ctrl_img'
|
||||
| 'sample.multi_ctrl_imgs'
|
||||
| 'datasets.num_frames'
|
||||
@@ -288,6 +291,7 @@ export const modelArchs: ModelArch[] = [
|
||||
'config.process[0].sample.width': [768, 1024],
|
||||
'config.process[0].sample.height': [768, 1024],
|
||||
'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
|
||||
'config.process[0].datasets[x].do_i2v': [true, undefined],
|
||||
},
|
||||
disableSections: ['network.conv'],
|
||||
additionalSections: ['sample.ctrl_img', 'datasets.num_frames', 'model.low_vram', 'datasets.do_i2v'],
|
||||
@@ -601,6 +605,31 @@ export const modelArchs: ModelArch[] = [
|
||||
disableSections: ['network.conv'],
|
||||
additionalSections: ['model.low_vram', 'model.layer_offloading'],
|
||||
},
|
||||
{
|
||||
name: 'ltx2',
|
||||
label: 'LTX-2',
|
||||
group: 'video',
|
||||
isVideoModel: true,
|
||||
defaults: {
|
||||
// default updates when [selected, unselected] in the UI
|
||||
'config.process[0].model.name_or_path': ['Lightricks/LTX-2', defaultNameOrPath],
|
||||
'config.process[0].model.quantize': [true, false],
|
||||
'config.process[0].model.quantize_te': [true, false],
|
||||
'config.process[0].model.low_vram': [true, false],
|
||||
'config.process[0].sample.sampler': ['flowmatch', 'flowmatch'],
|
||||
'config.process[0].train.noise_scheduler': ['flowmatch', 'flowmatch'],
|
||||
'config.process[0].sample.num_frames': [121, 1],
|
||||
'config.process[0].sample.fps': [24, 1],
|
||||
'config.process[0].sample.width': [768, 1024],
|
||||
'config.process[0].sample.height': [768, 1024],
|
||||
'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
|
||||
'config.process[0].datasets[x].do_i2v': [false, undefined],
|
||||
'config.process[0].datasets[x].do_audio': [true, undefined],
|
||||
'config.process[0].datasets[x].fps': [24, undefined],
|
||||
},
|
||||
disableSections: ['network.conv'],
|
||||
additionalSections: ['datasets.num_frames', 'model.layer_offloading', 'model.low_vram', 'datasets.do_audio', 'datasets.audio_normalize', 'datasets.audio_preserve_pitch'],
|
||||
},
|
||||
].sort((a, b) => {
|
||||
// Sort by label, case-insensitive
|
||||
return a.label.localeCompare(b.label, undefined, { sensitivity: 'base' });
|
||||
|
||||
@@ -2,6 +2,25 @@ import { GroupedSelectOption, JobConfig, SelectOption } from '@/types';
|
||||
import { modelArchs, ModelArch } from './options';
|
||||
import { objectCopy } from '@/utils/basic';
|
||||
|
||||
const expandDatasetDefaults = (
|
||||
defaults: { [key: string]: any },
|
||||
numDatasets: number,
|
||||
): { [key: string]: any } => {
|
||||
// expands the defaults for datasets[x] to datasets[0], datasets[1], etc.
|
||||
const expandedDefaults: { [key: string]: any } = { ...defaults };
|
||||
for (const key in defaults) {
|
||||
if (key.includes('datasets[x].')) {
|
||||
for (let i = 0; i < numDatasets; i++) {
|
||||
const datasetKey = key.replace('datasets[x].', `datasets[${i}].`);
|
||||
const v = defaults[key];
|
||||
expandedDefaults[datasetKey] = Array.isArray(v) ? [...v] : objectCopy(v);
|
||||
}
|
||||
delete expandedDefaults[key];
|
||||
}
|
||||
}
|
||||
return expandedDefaults;
|
||||
};
|
||||
|
||||
export const handleModelArchChange = (
|
||||
currentArchName: string,
|
||||
newArchName: string,
|
||||
@@ -39,16 +58,11 @@ export const handleModelArchChange = (
|
||||
}
|
||||
}
|
||||
|
||||
// revert defaults from previous model
|
||||
for (const key in currentArch.defaults) {
|
||||
setJobConfig(currentArch.defaults[key][1], key);
|
||||
}
|
||||
const numDatasets = jobConfig.config.process[0].datasets.length;
|
||||
|
||||
let currentDefaults = expandDatasetDefaults(currentArch.defaults || {}, numDatasets);
|
||||
let newDefaults = expandDatasetDefaults(newArch?.defaults || {}, numDatasets);
|
||||
|
||||
if (newArch?.defaults) {
|
||||
for (const key in newArch.defaults) {
|
||||
setJobConfig(newArch.defaults[key][0], key);
|
||||
}
|
||||
}
|
||||
// set new model
|
||||
setJobConfig(newArchName, 'config.process[0].model.arch');
|
||||
|
||||
@@ -79,27 +93,27 @@ export const handleModelArchChange = (
|
||||
if (newDataset.control_path_1 && newDataset.control_path_1 !== '') {
|
||||
newDataset.control_path = newDataset.control_path_1;
|
||||
}
|
||||
if (newDataset.control_path_1) {
|
||||
if ('control_path_1' in newDataset) {
|
||||
delete newDataset.control_path_1;
|
||||
}
|
||||
if (newDataset.control_path_2) {
|
||||
if ('control_path_2' in newDataset) {
|
||||
delete newDataset.control_path_2;
|
||||
}
|
||||
if (newDataset.control_path_3) {
|
||||
if ('control_path_3' in newDataset) {
|
||||
delete newDataset.control_path_3;
|
||||
}
|
||||
} else {
|
||||
// does not have control images
|
||||
if (newDataset.control_path) {
|
||||
if ('control_path' in newDataset) {
|
||||
delete newDataset.control_path;
|
||||
}
|
||||
if (newDataset.control_path_1) {
|
||||
if ('control_path_1' in newDataset) {
|
||||
delete newDataset.control_path_1;
|
||||
}
|
||||
if (newDataset.control_path_2) {
|
||||
if ('control_path_2' in newDataset) {
|
||||
delete newDataset.control_path_2;
|
||||
}
|
||||
if (newDataset.control_path_3) {
|
||||
if ('control_path_3' in newDataset) {
|
||||
delete newDataset.control_path_3;
|
||||
}
|
||||
}
|
||||
@@ -120,4 +134,13 @@ export const handleModelArchChange = (
|
||||
return newSample;
|
||||
});
|
||||
setJobConfig(samples, 'config.process[0].sample.samples');
|
||||
|
||||
// revert defaults from previous model
|
||||
for (const key in currentDefaults) {
|
||||
setJobConfig(currentDefaults[key][1], key);
|
||||
}
|
||||
|
||||
for (const key in newDefaults) {
|
||||
setJobConfig(newDefaults[key][0], key);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -56,18 +56,6 @@ const SampleImageCard: React.FC<SampleImageCardProps> = ({
|
||||
return () => observer.disconnect();
|
||||
}, [observerRoot, rootMargin]);
|
||||
|
||||
// Pause video when leaving viewport
|
||||
useEffect(() => {
|
||||
if (!isVideo(imageUrl)) return;
|
||||
const v = videoRef.current;
|
||||
if (!v) return;
|
||||
if (!isVisible && !v.paused) {
|
||||
try {
|
||||
v.pause();
|
||||
} catch {}
|
||||
}
|
||||
}, [isVisible, imageUrl]);
|
||||
|
||||
const handleLoad = () => setLoaded(true);
|
||||
|
||||
return (
|
||||
@@ -81,9 +69,11 @@ const SampleImageCard: React.FC<SampleImageCardProps> = ({
|
||||
src={`/api/img/${encodeURIComponent(imageUrl)}`}
|
||||
className="w-full h-full object-cover"
|
||||
preload="none"
|
||||
onLoad={handleLoad}
|
||||
playsInline
|
||||
muted
|
||||
loop
|
||||
autoPlay
|
||||
controls={false}
|
||||
/>
|
||||
) : (
|
||||
|
||||
@@ -7,6 +7,7 @@ import { Cog } from 'lucide-react';
|
||||
import { Menu, MenuButton, MenuItem, MenuItems } from '@headlessui/react';
|
||||
import { openConfirm } from './ConfirmModal';
|
||||
import { apiClient } from '@/utils/api';
|
||||
import { isVideo } from '@/utils/basic';
|
||||
|
||||
interface Props {
|
||||
imgPath: string | null; // current image path
|
||||
@@ -200,13 +201,24 @@ export default function SampleImageViewer({
|
||||
className="relative transform rounded-lg bg-gray-800 text-left shadow-xl transition-all data-closed:translate-y-4 data-closed:opacity-0 data-enter:duration-300 data-enter:ease-out data-leave:duration-200 data-leave:ease-in max-w-[95%] max-h-[95vh] data-closed:sm:translate-y-0 data-closed:sm:scale-95 flex flex-col overflow-hidden"
|
||||
>
|
||||
<div className="overflow-hidden flex items-center justify-center">
|
||||
{imgPath && (
|
||||
<img
|
||||
src={`/api/img/${encodeURIComponent(imgPath)}`}
|
||||
alt="Sample Image"
|
||||
className="w-auto h-auto max-w-[95vw] max-h-[82vh] object-contain"
|
||||
/>
|
||||
)}
|
||||
{imgPath &&
|
||||
(isVideo(imgPath) ? (
|
||||
<video
|
||||
src={`/api/img/${encodeURIComponent(imgPath)}`}
|
||||
className="w-auto h-auto max-w-[95vw] max-h-[82vh] object-contain"
|
||||
preload="none"
|
||||
playsInline
|
||||
loop
|
||||
autoPlay
|
||||
controls={true}
|
||||
/>
|
||||
) : (
|
||||
<img
|
||||
src={`/api/img/${encodeURIComponent(imgPath)}`}
|
||||
alt="Sample Image"
|
||||
className="w-auto h-auto max-w-[95vw] max-h-[82vh] object-contain"
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
{/* # make full width */}
|
||||
<div className="bg-gray-950 text-sm flex justify-between items-center px-4 py-2">
|
||||
|
||||
@@ -107,6 +107,35 @@ const docs: { [key: string]: ConfigDoc } = {
|
||||
</>
|
||||
),
|
||||
},
|
||||
'datasets.do_audio': {
|
||||
title: 'Do Audio',
|
||||
description: (
|
||||
<>
|
||||
For models that support audio with video, this option will load the audio from the video and resize it to match
|
||||
the video sequence. Since the video is automatically resized, the audio may drop or raise in pitch to match the new
|
||||
speed of the video. It is important to prep your dataset to have the proper length before training.
|
||||
</>
|
||||
),
|
||||
},
|
||||
'datasets.audio_normalize': {
|
||||
title: 'Audio Normalize',
|
||||
description: (
|
||||
<>
|
||||
When loading audio, this will normalize the audio volume to the max peaks. Useful if your dataset has varying audio
|
||||
volumes. Warning, do not use if you have clips with full silence you want to keep, as it will raise the volume of those clips.
|
||||
</>
|
||||
),
|
||||
},
|
||||
'datasets.audio_preserve_pitch': {
|
||||
title: 'Audio Preserve Pitch',
|
||||
description: (
|
||||
<>
|
||||
When loading audio to match the number of frames requested, this option will preserve the pitch of the audio if
|
||||
the length does not match training target. It is recommended to have a dataset that matches your target length,
|
||||
as this option can add sound distortions.
|
||||
</>
|
||||
),
|
||||
},
|
||||
'datasets.flip': {
|
||||
title: 'Flip X and Flip Y',
|
||||
description: (
|
||||
|
||||
@@ -96,7 +96,11 @@ export interface DatasetConfig {
|
||||
control_path?: string | null;
|
||||
num_frames: number;
|
||||
shrink_video_to_frames: boolean;
|
||||
do_i2v: boolean;
|
||||
do_i2v?: boolean;
|
||||
do_audio?: boolean;
|
||||
audio_normalize?: boolean;
|
||||
audio_preserve_pitch?: boolean;
|
||||
fps?: number;
|
||||
flip_x: boolean;
|
||||
flip_y: boolean;
|
||||
control_path_1?: string | null;
|
||||
|
||||
@@ -17,21 +17,14 @@ export function setNestedValue<T, V>(obj: T, value: V, path?: string): T {
|
||||
}
|
||||
|
||||
// Split the path into segments
|
||||
const pathArray = path.split('.').flatMap(segment => {
|
||||
// Handle array notation like 'process[0]'
|
||||
const arrayMatch = segment.match(/^([^\[]+)(\[\d+\])+/);
|
||||
if (arrayMatch) {
|
||||
const propName = arrayMatch[1];
|
||||
const indices = segment
|
||||
.substring(propName.length)
|
||||
.match(/\[(\d+)\]/g)
|
||||
?.map(idx => parseInt(idx.substring(1, idx.length - 1)));
|
||||
const pathArray: Array<string | number> = [];
|
||||
const re = /([^[.\]]+)|\[(\d+)\]/g;
|
||||
let m: RegExpExecArray | null;
|
||||
|
||||
// Return property name followed by array indices
|
||||
return [propName, ...(indices || [])];
|
||||
}
|
||||
return segment;
|
||||
});
|
||||
while ((m = re.exec(path)) !== null) {
|
||||
if (m[1] !== undefined) pathArray.push(m[1]);
|
||||
else pathArray.push(Number(m[2]));
|
||||
}
|
||||
|
||||
// Navigate to the target location
|
||||
let current: any = result;
|
||||
@@ -43,8 +36,18 @@ export function setNestedValue<T, V>(obj: T, value: V, path?: string): T {
|
||||
if (!Array.isArray(current)) {
|
||||
throw new Error(`Cannot access index ${key} of non-array`);
|
||||
}
|
||||
// Create a copy of the array to maintain immutability
|
||||
current = [...current];
|
||||
|
||||
// Ensure the indexed element exists and is copied/created immutably
|
||||
const nextKey = pathArray[i + 1];
|
||||
const existing = current[key];
|
||||
|
||||
if (existing === undefined) {
|
||||
current[key] = typeof nextKey === 'number' ? [] : {};
|
||||
} else if (Array.isArray(existing)) {
|
||||
current[key] = [...existing];
|
||||
} else if (typeof existing === 'object' && existing !== null) {
|
||||
current[key] = { ...existing };
|
||||
} // else: primitives stay as-is
|
||||
} else {
|
||||
// For object properties, create a new object if it doesn't exist
|
||||
if (current[key] === undefined) {
|
||||
@@ -63,7 +66,11 @@ export function setNestedValue<T, V>(obj: T, value: V, path?: string): T {
|
||||
|
||||
// Set the value at the final path segment
|
||||
const finalKey = pathArray[pathArray.length - 1];
|
||||
current[finalKey] = value;
|
||||
if (value === undefined) {
|
||||
delete current[finalKey];
|
||||
} else {
|
||||
current[finalKey] = value;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user