Add LTX-2 Support (#644)

* WIP, adding support for LTX2

* Training on images working

* Fix loading comfy models

* Handle converting and deconverting lora so it matches original format

* Reworked ui to habdle ltx and propert dataset default overwriting.

* Update the way lokr saves to it is more compatable with comfy

* Audio loading and synchronization/resampling is working

* Add audio to training. Does it work? Maybe, still testing.

* Fixed fps default issue for sound

* Have ui set fps for accurate audio mapping on ltx

* Added audio procession options to the ui for ltx

* Clean up requirements
This commit is contained in:
Jaret Burkett
2026-01-13 04:55:30 -07:00
committed by GitHub
parent 6870ab490f
commit 5b5aadadb8
28 changed files with 2180 additions and 71 deletions

View File

@@ -15,7 +15,7 @@ export async function POST(request: Request) {
}
// make sure it is an image
if (!/\.(jpg|jpeg|png|bmp|gif|tiff|webp)$/i.test(imgPath.toLowerCase())) {
if (!/\.(jpg|jpeg|png|bmp|gif|tiff|webp|mp4)$/i.test(imgPath.toLowerCase())) {
return NextResponse.json({ error: 'Not an image' }, { status: 400 });
}

View File

@@ -29,7 +29,7 @@ export async function GET(request: NextRequest, { params }: { params: { jobID: s
const samples = fs
.readdirSync(samplesFolder)
.filter(file => {
return file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.webp');
return file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.webp') || file.endsWith('.mp4');
})
.map(file => {
return path.join(samplesFolder, file);

View File

@@ -862,6 +862,48 @@ export default function SimpleJob({
docKey="datasets.do_i2v"
/>
)}
{modelArch?.additionalSections?.includes('datasets.do_audio') && (
<Checkbox
label="Do Audio"
checked={dataset.do_audio || false}
onChange={value => {
if (!value) {
setJobConfig(undefined, `config.process[0].datasets[${i}].do_audio`);
} else {
setJobConfig(value, `config.process[0].datasets[${i}].do_audio`);
}
}}
docKey="datasets.do_audio"
/>
)}
{modelArch?.additionalSections?.includes('datasets.audio_normalize') && (
<Checkbox
label="Audio Normalize"
checked={dataset.audio_normalize || false}
onChange={value => {
if (!value) {
setJobConfig(undefined, `config.process[0].datasets[${i}].audio_normalize`);
} else {
setJobConfig(value, `config.process[0].datasets[${i}].audio_normalize`);
}
}}
docKey="datasets.audio_normalize"
/>
)}
{modelArch?.additionalSections?.includes('datasets.audio_preserve_pitch') && (
<Checkbox
label="Audio Preserve Pitch"
checked={dataset.audio_preserve_pitch || false}
onChange={value => {
if (!value) {
setJobConfig(undefined, `config.process[0].datasets[${i}].audio_preserve_pitch`);
} else {
setJobConfig(value, `config.process[0].datasets[${i}].audio_preserve_pitch`);
}
}}
docKey="datasets.audio_preserve_pitch"
/>
)}
</FormGroup>
<FormGroup label="Flipping" docKey={'datasets.flip'} className="mt-2">
<Checkbox

View File

@@ -14,7 +14,6 @@ export const defaultDatasetConfig: DatasetConfig = {
controls: [],
shrink_video_to_frames: true,
num_frames: 1,
do_i2v: true,
flip_x: false,
flip_y: false,
};

View File

@@ -17,6 +17,9 @@ type AdditionalSections =
| 'datasets.control_path'
| 'datasets.multi_control_paths'
| 'datasets.do_i2v'
| 'datasets.do_audio'
| 'datasets.audio_normalize'
| 'datasets.audio_preserve_pitch'
| 'sample.ctrl_img'
| 'sample.multi_ctrl_imgs'
| 'datasets.num_frames'
@@ -288,6 +291,7 @@ export const modelArchs: ModelArch[] = [
'config.process[0].sample.width': [768, 1024],
'config.process[0].sample.height': [768, 1024],
'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
'config.process[0].datasets[x].do_i2v': [true, undefined],
},
disableSections: ['network.conv'],
additionalSections: ['sample.ctrl_img', 'datasets.num_frames', 'model.low_vram', 'datasets.do_i2v'],
@@ -601,6 +605,31 @@ export const modelArchs: ModelArch[] = [
disableSections: ['network.conv'],
additionalSections: ['model.low_vram', 'model.layer_offloading'],
},
{
name: 'ltx2',
label: 'LTX-2',
group: 'video',
isVideoModel: true,
defaults: {
// default updates when [selected, unselected] in the UI
'config.process[0].model.name_or_path': ['Lightricks/LTX-2', defaultNameOrPath],
'config.process[0].model.quantize': [true, false],
'config.process[0].model.quantize_te': [true, false],
'config.process[0].model.low_vram': [true, false],
'config.process[0].sample.sampler': ['flowmatch', 'flowmatch'],
'config.process[0].train.noise_scheduler': ['flowmatch', 'flowmatch'],
'config.process[0].sample.num_frames': [121, 1],
'config.process[0].sample.fps': [24, 1],
'config.process[0].sample.width': [768, 1024],
'config.process[0].sample.height': [768, 1024],
'config.process[0].train.timestep_type': ['weighted', 'sigmoid'],
'config.process[0].datasets[x].do_i2v': [false, undefined],
'config.process[0].datasets[x].do_audio': [true, undefined],
'config.process[0].datasets[x].fps': [24, undefined],
},
disableSections: ['network.conv'],
additionalSections: ['datasets.num_frames', 'model.layer_offloading', 'model.low_vram', 'datasets.do_audio', 'datasets.audio_normalize', 'datasets.audio_preserve_pitch'],
},
].sort((a, b) => {
// Sort by label, case-insensitive
return a.label.localeCompare(b.label, undefined, { sensitivity: 'base' });

View File

@@ -2,6 +2,25 @@ import { GroupedSelectOption, JobConfig, SelectOption } from '@/types';
import { modelArchs, ModelArch } from './options';
import { objectCopy } from '@/utils/basic';
const expandDatasetDefaults = (
defaults: { [key: string]: any },
numDatasets: number,
): { [key: string]: any } => {
// expands the defaults for datasets[x] to datasets[0], datasets[1], etc.
const expandedDefaults: { [key: string]: any } = { ...defaults };
for (const key in defaults) {
if (key.includes('datasets[x].')) {
for (let i = 0; i < numDatasets; i++) {
const datasetKey = key.replace('datasets[x].', `datasets[${i}].`);
const v = defaults[key];
expandedDefaults[datasetKey] = Array.isArray(v) ? [...v] : objectCopy(v);
}
delete expandedDefaults[key];
}
}
return expandedDefaults;
};
export const handleModelArchChange = (
currentArchName: string,
newArchName: string,
@@ -39,16 +58,11 @@ export const handleModelArchChange = (
}
}
// revert defaults from previous model
for (const key in currentArch.defaults) {
setJobConfig(currentArch.defaults[key][1], key);
}
const numDatasets = jobConfig.config.process[0].datasets.length;
let currentDefaults = expandDatasetDefaults(currentArch.defaults || {}, numDatasets);
let newDefaults = expandDatasetDefaults(newArch?.defaults || {}, numDatasets);
if (newArch?.defaults) {
for (const key in newArch.defaults) {
setJobConfig(newArch.defaults[key][0], key);
}
}
// set new model
setJobConfig(newArchName, 'config.process[0].model.arch');
@@ -79,27 +93,27 @@ export const handleModelArchChange = (
if (newDataset.control_path_1 && newDataset.control_path_1 !== '') {
newDataset.control_path = newDataset.control_path_1;
}
if (newDataset.control_path_1) {
if ('control_path_1' in newDataset) {
delete newDataset.control_path_1;
}
if (newDataset.control_path_2) {
if ('control_path_2' in newDataset) {
delete newDataset.control_path_2;
}
if (newDataset.control_path_3) {
if ('control_path_3' in newDataset) {
delete newDataset.control_path_3;
}
} else {
// does not have control images
if (newDataset.control_path) {
if ('control_path' in newDataset) {
delete newDataset.control_path;
}
if (newDataset.control_path_1) {
if ('control_path_1' in newDataset) {
delete newDataset.control_path_1;
}
if (newDataset.control_path_2) {
if ('control_path_2' in newDataset) {
delete newDataset.control_path_2;
}
if (newDataset.control_path_3) {
if ('control_path_3' in newDataset) {
delete newDataset.control_path_3;
}
}
@@ -120,4 +134,13 @@ export const handleModelArchChange = (
return newSample;
});
setJobConfig(samples, 'config.process[0].sample.samples');
// revert defaults from previous model
for (const key in currentDefaults) {
setJobConfig(currentDefaults[key][1], key);
}
for (const key in newDefaults) {
setJobConfig(newDefaults[key][0], key);
}
};

View File

@@ -56,18 +56,6 @@ const SampleImageCard: React.FC<SampleImageCardProps> = ({
return () => observer.disconnect();
}, [observerRoot, rootMargin]);
// Pause video when leaving viewport
useEffect(() => {
if (!isVideo(imageUrl)) return;
const v = videoRef.current;
if (!v) return;
if (!isVisible && !v.paused) {
try {
v.pause();
} catch {}
}
}, [isVisible, imageUrl]);
const handleLoad = () => setLoaded(true);
return (
@@ -81,9 +69,11 @@ const SampleImageCard: React.FC<SampleImageCardProps> = ({
src={`/api/img/${encodeURIComponent(imageUrl)}`}
className="w-full h-full object-cover"
preload="none"
onLoad={handleLoad}
playsInline
muted
loop
autoPlay
controls={false}
/>
) : (

View File

@@ -7,6 +7,7 @@ import { Cog } from 'lucide-react';
import { Menu, MenuButton, MenuItem, MenuItems } from '@headlessui/react';
import { openConfirm } from './ConfirmModal';
import { apiClient } from '@/utils/api';
import { isVideo } from '@/utils/basic';
interface Props {
imgPath: string | null; // current image path
@@ -200,13 +201,24 @@ export default function SampleImageViewer({
className="relative transform rounded-lg bg-gray-800 text-left shadow-xl transition-all data-closed:translate-y-4 data-closed:opacity-0 data-enter:duration-300 data-enter:ease-out data-leave:duration-200 data-leave:ease-in max-w-[95%] max-h-[95vh] data-closed:sm:translate-y-0 data-closed:sm:scale-95 flex flex-col overflow-hidden"
>
<div className="overflow-hidden flex items-center justify-center">
{imgPath && (
<img
src={`/api/img/${encodeURIComponent(imgPath)}`}
alt="Sample Image"
className="w-auto h-auto max-w-[95vw] max-h-[82vh] object-contain"
/>
)}
{imgPath &&
(isVideo(imgPath) ? (
<video
src={`/api/img/${encodeURIComponent(imgPath)}`}
className="w-auto h-auto max-w-[95vw] max-h-[82vh] object-contain"
preload="none"
playsInline
loop
autoPlay
controls={true}
/>
) : (
<img
src={`/api/img/${encodeURIComponent(imgPath)}`}
alt="Sample Image"
className="w-auto h-auto max-w-[95vw] max-h-[82vh] object-contain"
/>
))}
</div>
{/* # make full width */}
<div className="bg-gray-950 text-sm flex justify-between items-center px-4 py-2">

View File

@@ -107,6 +107,35 @@ const docs: { [key: string]: ConfigDoc } = {
</>
),
},
'datasets.do_audio': {
title: 'Do Audio',
description: (
<>
For models that support audio with video, this option will load the audio from the video and resize it to match
the video sequence. Since the video is automatically resized, the audio may drop or raise in pitch to match the new
speed of the video. It is important to prep your dataset to have the proper length before training.
</>
),
},
'datasets.audio_normalize': {
title: 'Audio Normalize',
description: (
<>
When loading audio, this will normalize the audio volume to the max peaks. Useful if your dataset has varying audio
volumes. Warning, do not use if you have clips with full silence you want to keep, as it will raise the volume of those clips.
</>
),
},
'datasets.audio_preserve_pitch': {
title: 'Audio Preserve Pitch',
description: (
<>
When loading audio to match the number of frames requested, this option will preserve the pitch of the audio if
the length does not match training target. It is recommended to have a dataset that matches your target length,
as this option can add sound distortions.
</>
),
},
'datasets.flip': {
title: 'Flip X and Flip Y',
description: (

View File

@@ -96,7 +96,11 @@ export interface DatasetConfig {
control_path?: string | null;
num_frames: number;
shrink_video_to_frames: boolean;
do_i2v: boolean;
do_i2v?: boolean;
do_audio?: boolean;
audio_normalize?: boolean;
audio_preserve_pitch?: boolean;
fps?: number;
flip_x: boolean;
flip_y: boolean;
control_path_1?: string | null;

View File

@@ -17,21 +17,14 @@ export function setNestedValue<T, V>(obj: T, value: V, path?: string): T {
}
// Split the path into segments
const pathArray = path.split('.').flatMap(segment => {
// Handle array notation like 'process[0]'
const arrayMatch = segment.match(/^([^\[]+)(\[\d+\])+/);
if (arrayMatch) {
const propName = arrayMatch[1];
const indices = segment
.substring(propName.length)
.match(/\[(\d+)\]/g)
?.map(idx => parseInt(idx.substring(1, idx.length - 1)));
const pathArray: Array<string | number> = [];
const re = /([^[.\]]+)|\[(\d+)\]/g;
let m: RegExpExecArray | null;
// Return property name followed by array indices
return [propName, ...(indices || [])];
}
return segment;
});
while ((m = re.exec(path)) !== null) {
if (m[1] !== undefined) pathArray.push(m[1]);
else pathArray.push(Number(m[2]));
}
// Navigate to the target location
let current: any = result;
@@ -43,8 +36,18 @@ export function setNestedValue<T, V>(obj: T, value: V, path?: string): T {
if (!Array.isArray(current)) {
throw new Error(`Cannot access index ${key} of non-array`);
}
// Create a copy of the array to maintain immutability
current = [...current];
// Ensure the indexed element exists and is copied/created immutably
const nextKey = pathArray[i + 1];
const existing = current[key];
if (existing === undefined) {
current[key] = typeof nextKey === 'number' ? [] : {};
} else if (Array.isArray(existing)) {
current[key] = [...existing];
} else if (typeof existing === 'object' && existing !== null) {
current[key] = { ...existing };
} // else: primitives stay as-is
} else {
// For object properties, create a new object if it doesn't exist
if (current[key] === undefined) {
@@ -63,7 +66,11 @@ export function setNestedValue<T, V>(obj: T, value: V, path?: string): T {
// Set the value at the final path segment
const finalKey = pathArray[pathArray.length - 1];
current[finalKey] = value;
if (value === undefined) {
delete current[finalKey];
} else {
current[finalKey] = value;
}
return result;
}